code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62
  63 import ganeti.masterd.instance # pylint: disable-msg=W0611
  64
  65
  66 class ResultWithJobs:
  67   """Data container for LU results with jobs.
  68
  69   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  70   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  71   contained in the C{jobs} attribute and include the job IDs in the opcode
  72   result.
  73
  74   """
  75   def __init__(self, jobs, **kwargs):
  76     """Initializes this class.
  77
  78     Additional return values can be specified as keyword arguments.
  79
  80     @type jobs: list of lists of L{opcode.OpCode}
  81     @param jobs: A list of lists of opcode objects
  82
  83     """
  84     self.jobs = jobs
  85     self.other = kwargs
  86
  87
  88 class LogicalUnit(object):
  89   """Logical Unit base class.
  90
  91   Subclasses must follow these rules:
  92     - implement ExpandNames
  93     - implement CheckPrereq (except when tasklets are used)
  94     - implement Exec (except when tasklets are used)
  95     - implement BuildHooksEnv
  96     - implement BuildHooksNodes
  97     - redefine HPATH and HTYPE
  98     - optionally redefine their run requirements:
  99         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 100
 101   Note that all commands require root permissions.
 102
 103   @ivar dry_run_result: the value (if any) that will be returned to the caller
 104       in dry-run mode (signalled by opcode dry_run parameter)
 105
 106   """
 107   HPATH = None
 108   HTYPE = None
 109   REQ_BGL = True
 110
 111   def __init__(self, processor, op, context, rpc):
 112     """Constructor for LogicalUnit.
 113
 114     This needs to be overridden in derived classes in order to check op
 115     validity.
 116
 117     """
 118     self.proc = processor
 119     self.op = op
 120     self.cfg = context.cfg
 121     self.glm = context.glm
 122     # readability alias
 123     self.owned_locks = context.glm.list_owned
 124     self.context = context
 125     self.rpc = rpc
 126     # Dicts used to declare locking needs to mcpu
 127     self.needed_locks = None
 128     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 129     self.add_locks = {}
 130     self.remove_locks = {}
 131     # Used to force good behavior when calling helper functions
 132     self.recalculate_locks = {}
 133     # logging
 134     self.Log = processor.Log # pylint: disable-msg=C0103
 135     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 136     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 137     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 138     # support for dry-run
 139     self.dry_run_result = None
 140     # support for generic debug attribute
 141     if (not hasattr(self.op, "debug_level") or
 142         not isinstance(self.op.debug_level, int)):
 143       self.op.debug_level = 0
 144
 145     # Tasklets
 146     self.tasklets = None
 147
 148     # Validate opcode parameters and set defaults
 149     self.op.Validate(True)
 150
 151     self.CheckArguments()
 152
 153   def CheckArguments(self):
 154     """Check syntactic validity for the opcode arguments.
 155
 156     This method is for doing a simple syntactic check and ensure
 157     validity of opcode parameters, without any cluster-related
 158     checks. While the same can be accomplished in ExpandNames and/or
 159     CheckPrereq, doing these separate is better because:
 160
 161       - ExpandNames is left as as purely a lock-related function
 162       - CheckPrereq is run after we have acquired locks (and possible
 163         waited for them)
 164
 165     The function is allowed to change the self.op attribute so that
 166     later methods can no longer worry about missing parameters.
 167
 168     """
 169     pass
 170
 171   def ExpandNames(self):
 172     """Expand names for this LU.
 173
 174     This method is called before starting to execute the opcode, and it should
 175     update all the parameters of the opcode to their canonical form (e.g. a
 176     short node name must be fully expanded after this method has successfully
 177     completed). This way locking, hooks, logging, etc. can work correctly.
 178
 179     LUs which implement this method must also populate the self.needed_locks
 180     member, as a dict with lock levels as keys, and a list of needed lock names
 181     as values. Rules:
 182
 183       - use an empty dict if you don't need any lock
 184       - if you don't need any lock at a particular level omit that level
 185       - don't put anything for the BGL level
 186       - if you want all locks at a level use locking.ALL_SET as a value
 187
 188     If you need to share locks (rather than acquire them exclusively) at one
 189     level you can modify self.share_locks, setting a true value (usually 1) for
 190     that level. By default locks are not shared.
 191
 192     This function can also define a list of tasklets, which then will be
 193     executed in order instead of the usual LU-level CheckPrereq and Exec
 194     functions, if those are not defined by the LU.
 195
 196     Examples::
 197
 198       # Acquire all nodes and one instance
 199       self.needed_locks = {
 200         locking.LEVEL_NODE: locking.ALL_SET,
 201         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 202       }
 203       # Acquire just two nodes
 204       self.needed_locks = {
 205         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 206       }
 207       # Acquire no locks
 208       self.needed_locks = {} # No, you can't leave it to the default value None
 209
 210     """
 211     # The implementation of this method is mandatory only if the new LU is
 212     # concurrent, so that old LUs don't need to be changed all at the same
 213     # time.
 214     if self.REQ_BGL:
 215       self.needed_locks = {} # Exclusive LUs don't need locks.
 216     else:
 217       raise NotImplementedError
 218
 219   def DeclareLocks(self, level):
 220     """Declare LU locking needs for a level
 221
 222     While most LUs can just declare their locking needs at ExpandNames time,
 223     sometimes there's the need to calculate some locks after having acquired
 224     the ones before. This function is called just before acquiring locks at a
 225     particular level, but after acquiring the ones at lower levels, and permits
 226     such calculations. It can be used to modify self.needed_locks, and by
 227     default it does nothing.
 228
 229     This function is only called if you have something already set in
 230     self.needed_locks for the level.
 231
 232     @param level: Locking level which is going to be locked
 233     @type level: member of ganeti.locking.LEVELS
 234
 235     """
 236
 237   def CheckPrereq(self):
 238     """Check prerequisites for this LU.
 239
 240     This method should check that the prerequisites for the execution
 241     of this LU are fulfilled. It can do internode communication, but
 242     it should be idempotent - no cluster or system changes are
 243     allowed.
 244
 245     The method should raise errors.OpPrereqError in case something is
 246     not fulfilled. Its return value is ignored.
 247
 248     This method should also update all the parameters of the opcode to
 249     their canonical form if it hasn't been done by ExpandNames before.
 250
 251     """
 252     if self.tasklets is not None:
 253       for (idx, tl) in enumerate(self.tasklets):
 254         logging.debug("Checking prerequisites for tasklet %s/%s",
 255                       idx + 1, len(self.tasklets))
 256         tl.CheckPrereq()
 257     else:
 258       pass
 259
 260   def Exec(self, feedback_fn):
 261     """Execute the LU.
 262
 263     This method should implement the actual work. It should raise
 264     errors.OpExecError for failures that are somewhat dealt with in
 265     code, or expected.
 266
 267     """
 268     if self.tasklets is not None:
 269       for (idx, tl) in enumerate(self.tasklets):
 270         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 271         tl.Exec(feedback_fn)
 272     else:
 273       raise NotImplementedError
 274
 275   def BuildHooksEnv(self):
 276     """Build hooks environment for this LU.
 277
 278     @rtype: dict
 279     @return: Dictionary containing the environment that will be used for
 280       running the hooks for this LU. The keys of the dict must not be prefixed
 281       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 282       will extend the environment with additional variables. If no environment
 283       should be defined, an empty dictionary should be returned (not C{None}).
 284     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 285       will not be called.
 286
 287     """
 288     raise NotImplementedError
 289
 290   def BuildHooksNodes(self):
 291     """Build list of nodes to run LU's hooks.
 292
 293     @rtype: tuple; (list, list)
 294     @return: Tuple containing a list of node names on which the hook
 295       should run before the execution and a list of node names on which the
 296       hook should run after the execution. No nodes should be returned as an
 297       empty list (and not None).
 298     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 299       will not be called.
 300
 301     """
 302     raise NotImplementedError
 303
 304   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 305     """Notify the LU about the results of its hooks.
 306
 307     This method is called every time a hooks phase is executed, and notifies
 308     the Logical Unit about the hooks' result. The LU can then use it to alter
 309     its result based on the hooks.  By default the method does nothing and the
 310     previous result is passed back unchanged but any LU can define it if it
 311     wants to use the local cluster hook-scripts somehow.
 312
 313     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 314         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 315     @param hook_results: the results of the multi-node hooks rpc call
 316     @param feedback_fn: function used send feedback back to the caller
 317     @param lu_result: the previous Exec result this LU had, or None
 318         in the PRE phase
 319     @return: the new Exec result, based on the previous result
 320         and hook results
 321
 322     """
 323     # API must be kept, thus we ignore the unused argument and could
 324     # be a function warnings
 325     # pylint: disable-msg=W0613,R0201
 326     return lu_result
 327
 328   def _ExpandAndLockInstance(self):
 329     """Helper function to expand and lock an instance.
 330
 331     Many LUs that work on an instance take its name in self.op.instance_name
 332     and need to expand it and then declare the expanded name for locking. This
 333     function does it, and then updates self.op.instance_name to the expanded
 334     name. It also initializes needed_locks as a dict, if this hasn't been done
 335     before.
 336
 337     """
 338     if self.needed_locks is None:
 339       self.needed_locks = {}
 340     else:
 341       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 342         "_ExpandAndLockInstance called with instance-level locks set"
 343     self.op.instance_name = _ExpandInstanceName(self.cfg,
 344                                                 self.op.instance_name)
 345     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 346
 347   def _LockInstancesNodes(self, primary_only=False):
 348     """Helper function to declare instances' nodes for locking.
 349
 350     This function should be called after locking one or more instances to lock
 351     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 352     with all primary or secondary nodes for instances already locked and
 353     present in self.needed_locks[locking.LEVEL_INSTANCE].
 354
 355     It should be called from DeclareLocks, and for safety only works if
 356     self.recalculate_locks[locking.LEVEL_NODE] is set.
 357
 358     In the future it may grow parameters to just lock some instance's nodes, or
 359     to just lock primaries or secondary nodes, if needed.
 360
 361     If should be called in DeclareLocks in a way similar to::
 362
 363       if level == locking.LEVEL_NODE:
 364         self._LockInstancesNodes()
 365
 366     @type primary_only: boolean
 367     @param primary_only: only lock primary nodes of locked instances
 368
 369     """
 370     assert locking.LEVEL_NODE in self.recalculate_locks, \
 371       "_LockInstancesNodes helper function called with no nodes to recalculate"
 372
 373     # TODO: check if we're really been called with the instance locks held
 374
 375     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 376     # future we might want to have different behaviors depending on the value
 377     # of self.recalculate_locks[locking.LEVEL_NODE]
 378     wanted_nodes = []
 379     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 380     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 381       wanted_nodes.append(instance.primary_node)
 382       if not primary_only:
 383         wanted_nodes.extend(instance.secondary_nodes)
 384
 385     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 386       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 387     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 388       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 389
 390     del self.recalculate_locks[locking.LEVEL_NODE]
 391
 392
 393 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 394   """Simple LU which runs no hooks.
 395
 396   This LU is intended as a parent for other LogicalUnits which will
 397   run no hooks, in order to reduce duplicate code.
 398
 399   """
 400   HPATH = None
 401   HTYPE = None
 402
 403   def BuildHooksEnv(self):
 404     """Empty BuildHooksEnv for NoHooksLu.
 405
 406     This just raises an error.
 407
 408     """
 409     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 410
 411   def BuildHooksNodes(self):
 412     """Empty BuildHooksNodes for NoHooksLU.
 413
 414     """
 415     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 416
 417
 418 class Tasklet:
 419   """Tasklet base class.
 420
 421   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 422   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 423   tasklets know nothing about locks.
 424
 425   Subclasses must follow these rules:
 426     - Implement CheckPrereq
 427     - Implement Exec
 428
 429   """
 430   def __init__(self, lu):
 431     self.lu = lu
 432
 433     # Shortcuts
 434     self.cfg = lu.cfg
 435     self.rpc = lu.rpc
 436
 437   def CheckPrereq(self):
 438     """Check prerequisites for this tasklets.
 439
 440     This method should check whether the prerequisites for the execution of
 441     this tasklet are fulfilled. It can do internode communication, but it
 442     should be idempotent - no cluster or system changes are allowed.
 443
 444     The method should raise errors.OpPrereqError in case something is not
 445     fulfilled. Its return value is ignored.
 446
 447     This method should also update all parameters to their canonical form if it
 448     hasn't been done before.
 449
 450     """
 451     pass
 452
 453   def Exec(self, feedback_fn):
 454     """Execute the tasklet.
 455
 456     This method should implement the actual work. It should raise
 457     errors.OpExecError for failures that are somewhat dealt with in code, or
 458     expected.
 459
 460     """
 461     raise NotImplementedError
 462
 463
 464 class _QueryBase:
 465   """Base for query utility classes.
 466
 467   """
 468   #: Attribute holding field definitions
 469   FIELDS = None
 470
 471   def __init__(self, filter_, fields, use_locking):
 472     """Initializes this class.
 473
 474     """
 475     self.use_locking = use_locking
 476
 477     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 478                              namefield="name")
 479     self.requested_data = self.query.RequestedData()
 480     self.names = self.query.RequestedNames()
 481
 482     # Sort only if no names were requested
 483     self.sort_by_name = not self.names
 484
 485     self.do_locking = None
 486     self.wanted = None
 487
 488   def _GetNames(self, lu, all_names, lock_level):
 489     """Helper function to determine names asked for in the query.
 490
 491     """
 492     if self.do_locking:
 493       names = lu.owned_locks(lock_level)
 494     else:
 495       names = all_names
 496
 497     if self.wanted == locking.ALL_SET:
 498       assert not self.names
 499       # caller didn't specify names, so ordering is not important
 500       return utils.NiceSort(names)
 501
 502     # caller specified names and we must keep the same order
 503     assert self.names
 504     assert not self.do_locking or lu.glm.is_owned(lock_level)
 505
 506     missing = set(self.wanted).difference(names)
 507     if missing:
 508       raise errors.OpExecError("Some items were removed before retrieving"
 509                                " their data: %s" % missing)
 510
 511     # Return expanded names
 512     return self.wanted
 513
 514   def ExpandNames(self, lu):
 515     """Expand names for this query.
 516
 517     See L{LogicalUnit.ExpandNames}.
 518
 519     """
 520     raise NotImplementedError()
 521
 522   def DeclareLocks(self, lu, level):
 523     """Declare locks for this query.
 524
 525     See L{LogicalUnit.DeclareLocks}.
 526
 527     """
 528     raise NotImplementedError()
 529
 530   def _GetQueryData(self, lu):
 531     """Collects all data for this query.
 532
 533     @return: Query data object
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def NewStyleQuery(self, lu):
 539     """Collect data and execute query.
 540
 541     """
 542     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 543                                   sort_by_name=self.sort_by_name)
 544
 545   def OldStyleQuery(self, lu):
 546     """Collect data and execute query.
 547
 548     """
 549     return self.query.OldStyleQuery(self._GetQueryData(lu),
 550                                     sort_by_name=self.sort_by_name)
 551
 552
 553 def _ShareAll():
 554   """Returns a dict declaring all lock levels shared.
 555
 556   """
 557   return dict.fromkeys(locking.LEVELS, 1)
 558
 559
 560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 561   """Checks if the owned node groups are still correct for an instance.
 562
 563   @type cfg: L{config.ConfigWriter}
 564   @param cfg: The cluster configuration
 565   @type instance_name: string
 566   @param instance_name: Instance name
 567   @type owned_groups: set or frozenset
 568   @param owned_groups: List of currently owned node groups
 569
 570   """
 571   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 572
 573   if not owned_groups.issuperset(inst_groups):
 574     raise errors.OpPrereqError("Instance %s's node groups changed since"
 575                                " locks were acquired, current groups are"
 576                                " are '%s', owning groups '%s'; retry the"
 577                                " operation" %
 578                                (instance_name,
 579                                 utils.CommaJoin(inst_groups),
 580                                 utils.CommaJoin(owned_groups)),
 581                                errors.ECODE_STATE)
 582
 583   return inst_groups
 584
 585
 586 def _SupportsOob(cfg, node):
 587   """Tells if node supports OOB.
 588
 589   @type cfg: L{config.ConfigWriter}
 590   @param cfg: The cluster configuration
 591   @type node: L{objects.Node}
 592   @param node: The node
 593   @return: The OOB script if supported or an empty string otherwise
 594
 595   """
 596   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 597
 598
 599 def _GetWantedNodes(lu, nodes):
 600   """Returns list of checked and expanded node names.
 601
 602   @type lu: L{LogicalUnit}
 603   @param lu: the logical unit on whose behalf we execute
 604   @type nodes: list
 605   @param nodes: list of node names or None for all nodes
 606   @rtype: list
 607   @return: the list of nodes, sorted
 608   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 609
 610   """
 611   if nodes:
 612     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 613
 614   return utils.NiceSort(lu.cfg.GetNodeList())
 615
 616
 617 def _GetWantedInstances(lu, instances):
 618   """Returns list of checked and expanded instance names.
 619
 620   @type lu: L{LogicalUnit}
 621   @param lu: the logical unit on whose behalf we execute
 622   @type instances: list
 623   @param instances: list of instance names or None for all instances
 624   @rtype: list
 625   @return: the list of instances, sorted
 626   @raise errors.OpPrereqError: if the instances parameter is wrong type
 627   @raise errors.OpPrereqError: if any of the passed instances is not found
 628
 629   """
 630   if instances:
 631     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 632   else:
 633     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 634   return wanted
 635
 636
 637 def _GetUpdatedParams(old_params, update_dict,
 638                       use_default=True, use_none=False):
 639   """Return the new version of a parameter dictionary.
 640
 641   @type old_params: dict
 642   @param old_params: old parameters
 643   @type update_dict: dict
 644   @param update_dict: dict containing new parameter values, or
 645       constants.VALUE_DEFAULT to reset the parameter to its default
 646       value
 647   @param use_default: boolean
 648   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 649       values as 'to be deleted' values
 650   @param use_none: boolean
 651   @type use_none: whether to recognise C{None} values as 'to be
 652       deleted' values
 653   @rtype: dict
 654   @return: the new parameter dictionary
 655
 656   """
 657   params_copy = copy.deepcopy(old_params)
 658   for key, val in update_dict.iteritems():
 659     if ((use_default and val == constants.VALUE_DEFAULT) or
 660         (use_none and val is None)):
 661       try:
 662         del params_copy[key]
 663       except KeyError:
 664         pass
 665     else:
 666       params_copy[key] = val
 667   return params_copy
 668
 669
 670 def _ReleaseLocks(lu, level, names=None, keep=None):
 671   """Releases locks owned by an LU.
 672
 673   @type lu: L{LogicalUnit}
 674   @param level: Lock level
 675   @type names: list or None
 676   @param names: Names of locks to release
 677   @type keep: list or None
 678   @param keep: Names of locks to retain
 679
 680   """
 681   assert not (keep is not None and names is not None), \
 682          "Only one of the 'names' and the 'keep' parameters can be given"
 683
 684   if names is not None:
 685     should_release = names.__contains__
 686   elif keep:
 687     should_release = lambda name: name not in keep
 688   else:
 689     should_release = None
 690
 691   if should_release:
 692     retain = []
 693     release = []
 694
 695     # Determine which locks to release
 696     for name in lu.owned_locks(level):
 697       if should_release(name):
 698         release.append(name)
 699       else:
 700         retain.append(name)
 701
 702     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 703
 704     # Release just some locks
 705     lu.glm.release(level, names=release)
 706
 707     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 708   else:
 709     # Release everything
 710     lu.glm.release(level)
 711
 712     assert not lu.glm.is_owned(level), "No locks should be owned"
 713
 714
 715 def _MapInstanceDisksToNodes(instances):
 716   """Creates a map from (node, volume) to instance name.
 717
 718   @type instances: list of L{objects.Instance}
 719   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 720
 721   """
 722   return dict(((node, vol), inst.name)
 723               for inst in instances
 724               for (node, vols) in inst.MapLVsByNode().items()
 725               for vol in vols)
 726
 727
 728 def _RunPostHook(lu, node_name):
 729   """Runs the post-hook for an opcode on a single node.
 730
 731   """
 732   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 733   try:
 734     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 735   except:
 736     # pylint: disable-msg=W0702
 737     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 738
 739
 740 def _CheckOutputFields(static, dynamic, selected):
 741   """Checks whether all selected fields are valid.
 742
 743   @type static: L{utils.FieldSet}
 744   @param static: static fields set
 745   @type dynamic: L{utils.FieldSet}
 746   @param dynamic: dynamic fields set
 747
 748   """
 749   f = utils.FieldSet()
 750   f.Extend(static)
 751   f.Extend(dynamic)
 752
 753   delta = f.NonMatching(selected)
 754   if delta:
 755     raise errors.OpPrereqError("Unknown output fields selected: %s"
 756                                % ",".join(delta), errors.ECODE_INVAL)
 757
 758
 759 def _CheckGlobalHvParams(params):
 760   """Validates that given hypervisor params are not global ones.
 761
 762   This will ensure that instances don't get customised versions of
 763   global params.
 764
 765   """
 766   used_globals = constants.HVC_GLOBALS.intersection(params)
 767   if used_globals:
 768     msg = ("The following hypervisor parameters are global and cannot"
 769            " be customized at instance level, please modify them at"
 770            " cluster level: %s" % utils.CommaJoin(used_globals))
 771     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 772
 773
 774 def _CheckNodeOnline(lu, node, msg=None):
 775   """Ensure that a given node is online.
 776
 777   @param lu: the LU on behalf of which we make the check
 778   @param node: the node to check
 779   @param msg: if passed, should be a message to replace the default one
 780   @raise errors.OpPrereqError: if the node is offline
 781
 782   """
 783   if msg is None:
 784     msg = "Can't use offline node"
 785   if lu.cfg.GetNodeInfo(node).offline:
 786     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 787
 788
 789 def _CheckNodeNotDrained(lu, node):
 790   """Ensure that a given node is not drained.
 791
 792   @param lu: the LU on behalf of which we make the check
 793   @param node: the node to check
 794   @raise errors.OpPrereqError: if the node is drained
 795
 796   """
 797   if lu.cfg.GetNodeInfo(node).drained:
 798     raise errors.OpPrereqError("Can't use drained node %s" % node,
 799                                errors.ECODE_STATE)
 800
 801
 802 def _CheckNodeVmCapable(lu, node):
 803   """Ensure that a given node is vm capable.
 804
 805   @param lu: the LU on behalf of which we make the check
 806   @param node: the node to check
 807   @raise errors.OpPrereqError: if the node is not vm capable
 808
 809   """
 810   if not lu.cfg.GetNodeInfo(node).vm_capable:
 811     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 812                                errors.ECODE_STATE)
 813
 814
 815 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 816   """Ensure that a node supports a given OS.
 817
 818   @param lu: the LU on behalf of which we make the check
 819   @param node: the node to check
 820   @param os_name: the OS to query about
 821   @param force_variant: whether to ignore variant errors
 822   @raise errors.OpPrereqError: if the node is not supporting the OS
 823
 824   """
 825   result = lu.rpc.call_os_get(node, os_name)
 826   result.Raise("OS '%s' not in supported OS list for node %s" %
 827                (os_name, node),
 828                prereq=True, ecode=errors.ECODE_INVAL)
 829   if not force_variant:
 830     _CheckOSVariant(result.payload, os_name)
 831
 832
 833 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 834   """Ensure that a node has the given secondary ip.
 835
 836   @type lu: L{LogicalUnit}
 837   @param lu: the LU on behalf of which we make the check
 838   @type node: string
 839   @param node: the node to check
 840   @type secondary_ip: string
 841   @param secondary_ip: the ip to check
 842   @type prereq: boolean
 843   @param prereq: whether to throw a prerequisite or an execute error
 844   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 845   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 846
 847   """
 848   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 849   result.Raise("Failure checking secondary ip on node %s" % node,
 850                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 851   if not result.payload:
 852     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 853            " please fix and re-run this command" % secondary_ip)
 854     if prereq:
 855       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 856     else:
 857       raise errors.OpExecError(msg)
 858
 859
 860 def _GetClusterDomainSecret():
 861   """Reads the cluster domain secret.
 862
 863   """
 864   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 865                                strict=True)
 866
 867
 868 def _CheckInstanceDown(lu, instance, reason):
 869   """Ensure that an instance is not running."""
 870   if instance.admin_up:
 871     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 872                                (instance.name, reason), errors.ECODE_STATE)
 873
 874   pnode = instance.primary_node
 875   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 876   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 877               prereq=True, ecode=errors.ECODE_ENVIRON)
 878
 879   if instance.name in ins_l.payload:
 880     raise errors.OpPrereqError("Instance %s is running, %s" %
 881                                (instance.name, reason), errors.ECODE_STATE)
 882
 883
 884 def _ExpandItemName(fn, name, kind):
 885   """Expand an item name.
 886
 887   @param fn: the function to use for expansion
 888   @param name: requested item name
 889   @param kind: text description ('Node' or 'Instance')
 890   @return: the resolved (full) name
 891   @raise errors.OpPrereqError: if the item is not found
 892
 893   """
 894   full_name = fn(name)
 895   if full_name is None:
 896     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 897                                errors.ECODE_NOENT)
 898   return full_name
 899
 900
 901 def _ExpandNodeName(cfg, name):
 902   """Wrapper over L{_ExpandItemName} for nodes."""
 903   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 904
 905
 906 def _ExpandInstanceName(cfg, name):
 907   """Wrapper over L{_ExpandItemName} for instance."""
 908   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 909
 910
 911 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 912                           memory, vcpus, nics, disk_template, disks,
 913                           bep, hvp, hypervisor_name, tags):
 914   """Builds instance related env variables for hooks
 915
 916   This builds the hook environment from individual variables.
 917
 918   @type name: string
 919   @param name: the name of the instance
 920   @type primary_node: string
 921   @param primary_node: the name of the instance's primary node
 922   @type secondary_nodes: list
 923   @param secondary_nodes: list of secondary nodes as strings
 924   @type os_type: string
 925   @param os_type: the name of the instance's OS
 926   @type status: boolean
 927   @param status: the should_run status of the instance
 928   @type memory: string
 929   @param memory: the memory size of the instance
 930   @type vcpus: string
 931   @param vcpus: the count of VCPUs the instance has
 932   @type nics: list
 933   @param nics: list of tuples (ip, mac, mode, link) representing
 934       the NICs the instance has
 935   @type disk_template: string
 936   @param disk_template: the disk template of the instance
 937   @type disks: list
 938   @param disks: the list of (size, mode) pairs
 939   @type bep: dict
 940   @param bep: the backend parameters for the instance
 941   @type hvp: dict
 942   @param hvp: the hypervisor parameters for the instance
 943   @type hypervisor_name: string
 944   @param hypervisor_name: the hypervisor for the instance
 945   @type tags: list
 946   @param tags: list of instance tags as strings
 947   @rtype: dict
 948   @return: the hook environment for this instance
 949
 950   """
 951   if status:
 952     str_status = "up"
 953   else:
 954     str_status = "down"
 955   env = {
 956     "OP_TARGET": name,
 957     "INSTANCE_NAME": name,
 958     "INSTANCE_PRIMARY": primary_node,
 959     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 960     "INSTANCE_OS_TYPE": os_type,
 961     "INSTANCE_STATUS": str_status,
 962     "INSTANCE_MEMORY": memory,
 963     "INSTANCE_VCPUS": vcpus,
 964     "INSTANCE_DISK_TEMPLATE": disk_template,
 965     "INSTANCE_HYPERVISOR": hypervisor_name,
 966   }
 967
 968   if nics:
 969     nic_count = len(nics)
 970     for idx, (ip, mac, mode, link) in enumerate(nics):
 971       if ip is None:
 972         ip = ""
 973       env["INSTANCE_NIC%d_IP" % idx] = ip
 974       env["INSTANCE_NIC%d_MAC" % idx] = mac
 975       env["INSTANCE_NIC%d_MODE" % idx] = mode
 976       env["INSTANCE_NIC%d_LINK" % idx] = link
 977       if mode == constants.NIC_MODE_BRIDGED:
 978         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 979   else:
 980     nic_count = 0
 981
 982   env["INSTANCE_NIC_COUNT"] = nic_count
 983
 984   if disks:
 985     disk_count = len(disks)
 986     for idx, (size, mode) in enumerate(disks):
 987       env["INSTANCE_DISK%d_SIZE" % idx] = size
 988       env["INSTANCE_DISK%d_MODE" % idx] = mode
 989   else:
 990     disk_count = 0
 991
 992   env["INSTANCE_DISK_COUNT"] = disk_count
 993
 994   if not tags:
 995     tags = []
 996
 997   env["INSTANCE_TAGS"] = " ".join(tags)
 998
 999   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1000     for key, value in source.items():
1001       env["INSTANCE_%s_%s" % (kind, key)] = value
1002
1003   return env
1004
1005
1006 def _NICListToTuple(lu, nics):
1007   """Build a list of nic information tuples.
1008
1009   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1010   value in LUInstanceQueryData.
1011
1012   @type lu:  L{LogicalUnit}
1013   @param lu: the logical unit on whose behalf we execute
1014   @type nics: list of L{objects.NIC}
1015   @param nics: list of nics to convert to hooks tuples
1016
1017   """
1018   hooks_nics = []
1019   cluster = lu.cfg.GetClusterInfo()
1020   for nic in nics:
1021     ip = nic.ip
1022     mac = nic.mac
1023     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1024     mode = filled_params[constants.NIC_MODE]
1025     link = filled_params[constants.NIC_LINK]
1026     hooks_nics.append((ip, mac, mode, link))
1027   return hooks_nics
1028
1029
1030 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1031   """Builds instance related env variables for hooks from an object.
1032
1033   @type lu: L{LogicalUnit}
1034   @param lu: the logical unit on whose behalf we execute
1035   @type instance: L{objects.Instance}
1036   @param instance: the instance for which we should build the
1037       environment
1038   @type override: dict
1039   @param override: dictionary with key/values that will override
1040       our values
1041   @rtype: dict
1042   @return: the hook environment dictionary
1043
1044   """
1045   cluster = lu.cfg.GetClusterInfo()
1046   bep = cluster.FillBE(instance)
1047   hvp = cluster.FillHV(instance)
1048   args = {
1049     "name": instance.name,
1050     "primary_node": instance.primary_node,
1051     "secondary_nodes": instance.secondary_nodes,
1052     "os_type": instance.os,
1053     "status": instance.admin_up,
1054     "memory": bep[constants.BE_MEMORY],
1055     "vcpus": bep[constants.BE_VCPUS],
1056     "nics": _NICListToTuple(lu, instance.nics),
1057     "disk_template": instance.disk_template,
1058     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1059     "bep": bep,
1060     "hvp": hvp,
1061     "hypervisor_name": instance.hypervisor,
1062     "tags": instance.tags,
1063   }
1064   if override:
1065     args.update(override)
1066   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1067
1068
1069 def _AdjustCandidatePool(lu, exceptions):
1070   """Adjust the candidate pool after node operations.
1071
1072   """
1073   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1074   if mod_list:
1075     lu.LogInfo("Promoted nodes to master candidate role: %s",
1076                utils.CommaJoin(node.name for node in mod_list))
1077     for name in mod_list:
1078       lu.context.ReaddNode(name)
1079   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1080   if mc_now > mc_max:
1081     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1082                (mc_now, mc_max))
1083
1084
1085 def _DecideSelfPromotion(lu, exceptions=None):
1086   """Decide whether I should promote myself as a master candidate.
1087
1088   """
1089   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1090   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1091   # the new node will increase mc_max with one, so:
1092   mc_should = min(mc_should + 1, cp_size)
1093   return mc_now < mc_should
1094
1095
1096 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1097   """Check that the brigdes needed by a list of nics exist.
1098
1099   """
1100   cluster = lu.cfg.GetClusterInfo()
1101   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1102   brlist = [params[constants.NIC_LINK] for params in paramslist
1103             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1104   if brlist:
1105     result = lu.rpc.call_bridges_exist(target_node, brlist)
1106     result.Raise("Error checking bridges on destination node '%s'" %
1107                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1108
1109
1110 def _CheckInstanceBridgesExist(lu, instance, node=None):
1111   """Check that the brigdes needed by an instance exist.
1112
1113   """
1114   if node is None:
1115     node = instance.primary_node
1116   _CheckNicsBridgesExist(lu, instance.nics, node)
1117
1118
1119 def _CheckOSVariant(os_obj, name):
1120   """Check whether an OS name conforms to the os variants specification.
1121
1122   @type os_obj: L{objects.OS}
1123   @param os_obj: OS object to check
1124   @type name: string
1125   @param name: OS name passed by the user, to check for validity
1126
1127   """
1128   variant = objects.OS.GetVariant(name)
1129   if not os_obj.supported_variants:
1130     if variant:
1131       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1132                                  " passed)" % (os_obj.name, variant),
1133                                  errors.ECODE_INVAL)
1134     return
1135   if not variant:
1136     raise errors.OpPrereqError("OS name must include a variant",
1137                                errors.ECODE_INVAL)
1138
1139   if variant not in os_obj.supported_variants:
1140     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1141
1142
1143 def _GetNodeInstancesInner(cfg, fn):
1144   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1145
1146
1147 def _GetNodeInstances(cfg, node_name):
1148   """Returns a list of all primary and secondary instances on a node.
1149
1150   """
1151
1152   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1153
1154
1155 def _GetNodePrimaryInstances(cfg, node_name):
1156   """Returns primary instances on a node.
1157
1158   """
1159   return _GetNodeInstancesInner(cfg,
1160                                 lambda inst: node_name == inst.primary_node)
1161
1162
1163 def _GetNodeSecondaryInstances(cfg, node_name):
1164   """Returns secondary instances on a node.
1165
1166   """
1167   return _GetNodeInstancesInner(cfg,
1168                                 lambda inst: node_name in inst.secondary_nodes)
1169
1170
1171 def _GetStorageTypeArgs(cfg, storage_type):
1172   """Returns the arguments for a storage type.
1173
1174   """
1175   # Special case for file storage
1176   if storage_type == constants.ST_FILE:
1177     # storage.FileStorage wants a list of storage directories
1178     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1179
1180   return []
1181
1182
1183 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1184   faulty = []
1185
1186   for dev in instance.disks:
1187     cfg.SetDiskID(dev, node_name)
1188
1189   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1190   result.Raise("Failed to get disk status from node %s" % node_name,
1191                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1192
1193   for idx, bdev_status in enumerate(result.payload):
1194     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1195       faulty.append(idx)
1196
1197   return faulty
1198
1199
1200 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1201   """Check the sanity of iallocator and node arguments and use the
1202   cluster-wide iallocator if appropriate.
1203
1204   Check that at most one of (iallocator, node) is specified. If none is
1205   specified, then the LU's opcode's iallocator slot is filled with the
1206   cluster-wide default iallocator.
1207
1208   @type iallocator_slot: string
1209   @param iallocator_slot: the name of the opcode iallocator slot
1210   @type node_slot: string
1211   @param node_slot: the name of the opcode target node slot
1212
1213   """
1214   node = getattr(lu.op, node_slot, None)
1215   iallocator = getattr(lu.op, iallocator_slot, None)
1216
1217   if node is not None and iallocator is not None:
1218     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1219                                errors.ECODE_INVAL)
1220   elif node is None and iallocator is None:
1221     default_iallocator = lu.cfg.GetDefaultIAllocator()
1222     if default_iallocator:
1223       setattr(lu.op, iallocator_slot, default_iallocator)
1224     else:
1225       raise errors.OpPrereqError("No iallocator or node given and no"
1226                                  " cluster-wide default iallocator found;"
1227                                  " please specify either an iallocator or a"
1228                                  " node, or set a cluster-wide default"
1229                                  " iallocator")
1230
1231
1232 def _GetDefaultIAllocator(cfg, iallocator):
1233   """Decides on which iallocator to use.
1234
1235   @type cfg: L{config.ConfigWriter}
1236   @param cfg: Cluster configuration object
1237   @type iallocator: string or None
1238   @param iallocator: Iallocator specified in opcode
1239   @rtype: string
1240   @return: Iallocator name
1241
1242   """
1243   if not iallocator:
1244     # Use default iallocator
1245     iallocator = cfg.GetDefaultIAllocator()
1246
1247   if not iallocator:
1248     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1249                                " opcode nor as a cluster-wide default",
1250                                errors.ECODE_INVAL)
1251
1252   return iallocator
1253
1254
1255 class LUClusterPostInit(LogicalUnit):
1256   """Logical unit for running hooks after cluster initialization.
1257
1258   """
1259   HPATH = "cluster-init"
1260   HTYPE = constants.HTYPE_CLUSTER
1261
1262   def BuildHooksEnv(self):
1263     """Build hooks env.
1264
1265     """
1266     return {
1267       "OP_TARGET": self.cfg.GetClusterName(),
1268       }
1269
1270   def BuildHooksNodes(self):
1271     """Build hooks nodes.
1272
1273     """
1274     return ([], [self.cfg.GetMasterNode()])
1275
1276   def Exec(self, feedback_fn):
1277     """Nothing to do.
1278
1279     """
1280     return True
1281
1282
1283 class LUClusterDestroy(LogicalUnit):
1284   """Logical unit for destroying the cluster.
1285
1286   """
1287   HPATH = "cluster-destroy"
1288   HTYPE = constants.HTYPE_CLUSTER
1289
1290   def BuildHooksEnv(self):
1291     """Build hooks env.
1292
1293     """
1294     return {
1295       "OP_TARGET": self.cfg.GetClusterName(),
1296       }
1297
1298   def BuildHooksNodes(self):
1299     """Build hooks nodes.
1300
1301     """
1302     return ([], [])
1303
1304   def CheckPrereq(self):
1305     """Check prerequisites.
1306
1307     This checks whether the cluster is empty.
1308
1309     Any errors are signaled by raising errors.OpPrereqError.
1310
1311     """
1312     master = self.cfg.GetMasterNode()
1313
1314     nodelist = self.cfg.GetNodeList()
1315     if len(nodelist) != 1 or nodelist[0] != master:
1316       raise errors.OpPrereqError("There are still %d node(s) in"
1317                                  " this cluster." % (len(nodelist) - 1),
1318                                  errors.ECODE_INVAL)
1319     instancelist = self.cfg.GetInstanceList()
1320     if instancelist:
1321       raise errors.OpPrereqError("There are still %d instance(s) in"
1322                                  " this cluster." % len(instancelist),
1323                                  errors.ECODE_INVAL)
1324
1325   def Exec(self, feedback_fn):
1326     """Destroys the cluster.
1327
1328     """
1329     master = self.cfg.GetMasterNode()
1330
1331     # Run post hooks on master node before it's removed
1332     _RunPostHook(self, master)
1333
1334     result = self.rpc.call_node_stop_master(master, False)
1335     result.Raise("Could not disable the master role")
1336
1337     return master
1338
1339
1340 def _VerifyCertificate(filename):
1341   """Verifies a certificate for L{LUClusterVerifyConfig}.
1342
1343   @type filename: string
1344   @param filename: Path to PEM file
1345
1346   """
1347   try:
1348     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1349                                            utils.ReadFile(filename))
1350   except Exception, err: # pylint: disable-msg=W0703
1351     return (LUClusterVerifyConfig.ETYPE_ERROR,
1352             "Failed to load X509 certificate %s: %s" % (filename, err))
1353
1354   (errcode, msg) = \
1355     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1356                                 constants.SSL_CERT_EXPIRATION_ERROR)
1357
1358   if msg:
1359     fnamemsg = "While verifying %s: %s" % (filename, msg)
1360   else:
1361     fnamemsg = None
1362
1363   if errcode is None:
1364     return (None, fnamemsg)
1365   elif errcode == utils.CERT_WARNING:
1366     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1367   elif errcode == utils.CERT_ERROR:
1368     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1369
1370   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1371
1372
1373 def _GetAllHypervisorParameters(cluster, instances):
1374   """Compute the set of all hypervisor parameters.
1375
1376   @type cluster: L{objects.Cluster}
1377   @param cluster: the cluster object
1378   @param instances: list of L{objects.Instance}
1379   @param instances: additional instances from which to obtain parameters
1380   @rtype: list of (origin, hypervisor, parameters)
1381   @return: a list with all parameters found, indicating the hypervisor they
1382        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1383
1384   """
1385   hvp_data = []
1386
1387   for hv_name in cluster.enabled_hypervisors:
1388     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1389
1390   for os_name, os_hvp in cluster.os_hvp.items():
1391     for hv_name, hv_params in os_hvp.items():
1392       if hv_params:
1393         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1394         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1395
1396   # TODO: collapse identical parameter values in a single one
1397   for instance in instances:
1398     if instance.hvparams:
1399       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1400                        cluster.FillHV(instance)))
1401
1402   return hvp_data
1403
1404
1405 class _VerifyErrors(object):
1406   """Mix-in for cluster/group verify LUs.
1407
1408   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1409   self.op and self._feedback_fn to be available.)
1410
1411   """
1412   TCLUSTER = "cluster"
1413   TNODE = "node"
1414   TINSTANCE = "instance"
1415
1416   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1417   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1418   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1419   ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1420   ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1421   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1422   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1423   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1424   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1425   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1426   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1427   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1428   ENODEDRBD = (TNODE, "ENODEDRBD")
1429   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1430   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1431   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1432   ENODEHV = (TNODE, "ENODEHV")
1433   ENODELVM = (TNODE, "ENODELVM")
1434   ENODEN1 = (TNODE, "ENODEN1")
1435   ENODENET = (TNODE, "ENODENET")
1436   ENODEOS = (TNODE, "ENODEOS")
1437   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1438   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1439   ENODERPC = (TNODE, "ENODERPC")
1440   ENODESSH = (TNODE, "ENODESSH")
1441   ENODEVERSION = (TNODE, "ENODEVERSION")
1442   ENODESETUP = (TNODE, "ENODESETUP")
1443   ENODETIME = (TNODE, "ENODETIME")
1444   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1445
1446   ETYPE_FIELD = "code"
1447   ETYPE_ERROR = "ERROR"
1448   ETYPE_WARNING = "WARNING"
1449
1450   def _Error(self, ecode, item, msg, *args, **kwargs):
1451     """Format an error message.
1452
1453     Based on the opcode's error_codes parameter, either format a
1454     parseable error code, or a simpler error string.
1455
1456     This must be called only from Exec and functions called from Exec.
1457
1458     """
1459     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1460     itype, etxt = ecode
1461     # first complete the msg
1462     if args:
1463       msg = msg % args
1464     # then format the whole message
1465     if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1466       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1467     else:
1468       if item:
1469         item = " " + item
1470       else:
1471         item = ""
1472       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1473     # and finally report it via the feedback_fn
1474     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1475
1476   def _ErrorIf(self, cond, *args, **kwargs):
1477     """Log an error message if the passed condition is True.
1478
1479     """
1480     cond = (bool(cond)
1481             or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1482     if cond:
1483       self._Error(*args, **kwargs)
1484     # do not mark the operation as failed for WARN cases only
1485     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1486       self.bad = self.bad or cond
1487
1488
1489 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1490   """Verifies the cluster config.
1491
1492   """
1493   REQ_BGL = True
1494
1495   def _VerifyHVP(self, hvp_data):
1496     """Verifies locally the syntax of the hypervisor parameters.
1497
1498     """
1499     for item, hv_name, hv_params in hvp_data:
1500       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1501              (item, hv_name))
1502       try:
1503         hv_class = hypervisor.GetHypervisor(hv_name)
1504         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1505         hv_class.CheckParameterSyntax(hv_params)
1506       except errors.GenericError, err:
1507         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1508
1509   def ExpandNames(self):
1510     # Information can be safely retrieved as the BGL is acquired in exclusive
1511     # mode
1512     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1513     self.all_node_info = self.cfg.GetAllNodesInfo()
1514     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1515     self.needed_locks = {}
1516
1517   def Exec(self, feedback_fn):
1518     """Verify integrity of cluster, performing various test on nodes.
1519
1520     """
1521     self.bad = False
1522     self._feedback_fn = feedback_fn
1523
1524     feedback_fn("* Verifying cluster config")
1525
1526     for msg in self.cfg.VerifyConfig():
1527       self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1528
1529     feedback_fn("* Verifying cluster certificate files")
1530
1531     for cert_filename in constants.ALL_CERT_FILES:
1532       (errcode, msg) = _VerifyCertificate(cert_filename)
1533       self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1534
1535     feedback_fn("* Verifying hypervisor parameters")
1536
1537     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1538                                                 self.all_inst_info.values()))
1539
1540     feedback_fn("* Verifying all nodes belong to an existing group")
1541
1542     # We do this verification here because, should this bogus circumstance
1543     # occur, it would never be caught by VerifyGroup, which only acts on
1544     # nodes/instances reachable from existing node groups.
1545
1546     dangling_nodes = set(node.name for node in self.all_node_info.values()
1547                          if node.group not in self.all_group_info)
1548
1549     dangling_instances = {}
1550     no_node_instances = []
1551
1552     for inst in self.all_inst_info.values():
1553       if inst.primary_node in dangling_nodes:
1554         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1555       elif inst.primary_node not in self.all_node_info:
1556         no_node_instances.append(inst.name)
1557
1558     pretty_dangling = [
1559         "%s (%s)" %
1560         (node.name,
1561          utils.CommaJoin(dangling_instances.get(node.name,
1562                                                 ["no instances"])))
1563         for node in dangling_nodes]
1564
1565     self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1566                   "the following nodes (and their instances) belong to a non"
1567                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1568
1569     self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1570                   "the following instances have a non-existing primary-node:"
1571                   " %s", utils.CommaJoin(no_node_instances))
1572
1573     return (not self.bad, [g.name for g in self.all_group_info.values()])
1574
1575
1576 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1577   """Verifies the status of a node group.
1578
1579   """
1580   HPATH = "cluster-verify"
1581   HTYPE = constants.HTYPE_CLUSTER
1582   REQ_BGL = False
1583
1584   _HOOKS_INDENT_RE = re.compile("^", re.M)
1585
1586   class NodeImage(object):
1587     """A class representing the logical and physical status of a node.
1588
1589     @type name: string
1590     @ivar name: the node name to which this object refers
1591     @ivar volumes: a structure as returned from
1592         L{ganeti.backend.GetVolumeList} (runtime)
1593     @ivar instances: a list of running instances (runtime)
1594     @ivar pinst: list of configured primary instances (config)
1595     @ivar sinst: list of configured secondary instances (config)
1596     @ivar sbp: dictionary of {primary-node: list of instances} for all
1597         instances for which this node is secondary (config)
1598     @ivar mfree: free memory, as reported by hypervisor (runtime)
1599     @ivar dfree: free disk, as reported by the node (runtime)
1600     @ivar offline: the offline status (config)
1601     @type rpc_fail: boolean
1602     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1603         not whether the individual keys were correct) (runtime)
1604     @type lvm_fail: boolean
1605     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1606     @type hyp_fail: boolean
1607     @ivar hyp_fail: whether the RPC call didn't return the instance list
1608     @type ghost: boolean
1609     @ivar ghost: whether this is a known node or not (config)
1610     @type os_fail: boolean
1611     @ivar os_fail: whether the RPC call didn't return valid OS data
1612     @type oslist: list
1613     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1614     @type vm_capable: boolean
1615     @ivar vm_capable: whether the node can host instances
1616
1617     """
1618     def __init__(self, offline=False, name=None, vm_capable=True):
1619       self.name = name
1620       self.volumes = {}
1621       self.instances = []
1622       self.pinst = []
1623       self.sinst = []
1624       self.sbp = {}
1625       self.mfree = 0
1626       self.dfree = 0
1627       self.offline = offline
1628       self.vm_capable = vm_capable
1629       self.rpc_fail = False
1630       self.lvm_fail = False
1631       self.hyp_fail = False
1632       self.ghost = False
1633       self.os_fail = False
1634       self.oslist = {}
1635
1636   def ExpandNames(self):
1637     # This raises errors.OpPrereqError on its own:
1638     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1639
1640     # Get instances in node group; this is unsafe and needs verification later
1641     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1642
1643     self.needed_locks = {
1644       locking.LEVEL_INSTANCE: inst_names,
1645       locking.LEVEL_NODEGROUP: [self.group_uuid],
1646       locking.LEVEL_NODE: [],
1647       }
1648
1649     self.share_locks = _ShareAll()
1650
1651   def DeclareLocks(self, level):
1652     if level == locking.LEVEL_NODE:
1653       # Get members of node group; this is unsafe and needs verification later
1654       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1655
1656       all_inst_info = self.cfg.GetAllInstancesInfo()
1657
1658       # In Exec(), we warn about mirrored instances that have primary and
1659       # secondary living in separate node groups. To fully verify that
1660       # volumes for these instances are healthy, we will need to do an
1661       # extra call to their secondaries. We ensure here those nodes will
1662       # be locked.
1663       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1664         # Important: access only the instances whose lock is owned
1665         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1666           nodes.update(all_inst_info[inst].secondary_nodes)
1667
1668       self.needed_locks[locking.LEVEL_NODE] = nodes
1669
1670   def CheckPrereq(self):
1671     group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1672     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1673
1674     unlocked_nodes = \
1675         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1676
1677     unlocked_instances = \
1678         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1679
1680     if unlocked_nodes:
1681       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1682                                  utils.CommaJoin(unlocked_nodes))
1683
1684     if unlocked_instances:
1685       raise errors.OpPrereqError("Missing lock for instances: %s" %
1686                                  utils.CommaJoin(unlocked_instances))
1687
1688     self.all_node_info = self.cfg.GetAllNodesInfo()
1689     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1690
1691     self.my_node_names = utils.NiceSort(group_nodes)
1692     self.my_inst_names = utils.NiceSort(group_instances)
1693
1694     self.my_node_info = dict((name, self.all_node_info[name])
1695                              for name in self.my_node_names)
1696
1697     self.my_inst_info = dict((name, self.all_inst_info[name])
1698                              for name in self.my_inst_names)
1699
1700     # We detect here the nodes that will need the extra RPC calls for verifying
1701     # split LV volumes; they should be locked.
1702     extra_lv_nodes = set()
1703
1704     for inst in self.my_inst_info.values():
1705       if inst.disk_template in constants.DTS_INT_MIRROR:
1706         group = self.my_node_info[inst.primary_node].group
1707         for nname in inst.secondary_nodes:
1708           if self.all_node_info[nname].group != group:
1709             extra_lv_nodes.add(nname)
1710
1711     unlocked_lv_nodes = \
1712         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1713
1714     if unlocked_lv_nodes:
1715       raise errors.OpPrereqError("these nodes could be locked: %s" %
1716                                  utils.CommaJoin(unlocked_lv_nodes))
1717     self.extra_lv_nodes = list(extra_lv_nodes)
1718
1719   def _VerifyNode(self, ninfo, nresult):
1720     """Perform some basic validation on data returned from a node.
1721
1722       - check the result data structure is well formed and has all the
1723         mandatory fields
1724       - check ganeti version
1725
1726     @type ninfo: L{objects.Node}
1727     @param ninfo: the node to check
1728     @param nresult: the results from the node
1729     @rtype: boolean
1730     @return: whether overall this call was successful (and we can expect
1731          reasonable values in the respose)
1732
1733     """
1734     node = ninfo.name
1735     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1736
1737     # main result, nresult should be a non-empty dict
1738     test = not nresult or not isinstance(nresult, dict)
1739     _ErrorIf(test, self.ENODERPC, node,
1740                   "unable to verify node: no data returned")
1741     if test:
1742       return False
1743
1744     # compares ganeti version
1745     local_version = constants.PROTOCOL_VERSION
1746     remote_version = nresult.get("version", None)
1747     test = not (remote_version and
1748                 isinstance(remote_version, (list, tuple)) and
1749                 len(remote_version) == 2)
1750     _ErrorIf(test, self.ENODERPC, node,
1751              "connection to node returned invalid data")
1752     if test:
1753       return False
1754
1755     test = local_version != remote_version[0]
1756     _ErrorIf(test, self.ENODEVERSION, node,
1757              "incompatible protocol versions: master %s,"
1758              " node %s", local_version, remote_version[0])
1759     if test:
1760       return False
1761
1762     # node seems compatible, we can actually try to look into its results
1763
1764     # full package version
1765     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1766                   self.ENODEVERSION, node,
1767                   "software version mismatch: master %s, node %s",
1768                   constants.RELEASE_VERSION, remote_version[1],
1769                   code=self.ETYPE_WARNING)
1770
1771     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1772     if ninfo.vm_capable and isinstance(hyp_result, dict):
1773       for hv_name, hv_result in hyp_result.iteritems():
1774         test = hv_result is not None
1775         _ErrorIf(test, self.ENODEHV, node,
1776                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1777
1778     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1779     if ninfo.vm_capable and isinstance(hvp_result, list):
1780       for item, hv_name, hv_result in hvp_result:
1781         _ErrorIf(True, self.ENODEHV, node,
1782                  "hypervisor %s parameter verify failure (source %s): %s",
1783                  hv_name, item, hv_result)
1784
1785     test = nresult.get(constants.NV_NODESETUP,
1786                        ["Missing NODESETUP results"])
1787     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1788              "; ".join(test))
1789
1790     return True
1791
1792   def _VerifyNodeTime(self, ninfo, nresult,
1793                       nvinfo_starttime, nvinfo_endtime):
1794     """Check the node time.
1795
1796     @type ninfo: L{objects.Node}
1797     @param ninfo: the node to check
1798     @param nresult: the remote results for the node
1799     @param nvinfo_starttime: the start time of the RPC call
1800     @param nvinfo_endtime: the end time of the RPC call
1801
1802     """
1803     node = ninfo.name
1804     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1805
1806     ntime = nresult.get(constants.NV_TIME, None)
1807     try:
1808       ntime_merged = utils.MergeTime(ntime)
1809     except (ValueError, TypeError):
1810       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1811       return
1812
1813     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1814       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1815     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1816       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1817     else:
1818       ntime_diff = None
1819
1820     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1821              "Node time diverges by at least %s from master node time",
1822              ntime_diff)
1823
1824   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1825     """Check the node LVM results.
1826
1827     @type ninfo: L{objects.Node}
1828     @param ninfo: the node to check
1829     @param nresult: the remote results for the node
1830     @param vg_name: the configured VG name
1831
1832     """
1833     if vg_name is None:
1834       return
1835
1836     node = ninfo.name
1837     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1838
1839     # checks vg existence and size > 20G
1840     vglist = nresult.get(constants.NV_VGLIST, None)
1841     test = not vglist
1842     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1843     if not test:
1844       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1845                                             constants.MIN_VG_SIZE)
1846       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1847
1848     # check pv names
1849     pvlist = nresult.get(constants.NV_PVLIST, None)
1850     test = pvlist is None
1851     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1852     if not test:
1853       # check that ':' is not present in PV names, since it's a
1854       # special character for lvcreate (denotes the range of PEs to
1855       # use on the PV)
1856       for _, pvname, owner_vg in pvlist:
1857         test = ":" in pvname
1858         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1859                  " '%s' of VG '%s'", pvname, owner_vg)
1860
1861   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1862     """Check the node bridges.
1863
1864     @type ninfo: L{objects.Node}
1865     @param ninfo: the node to check
1866     @param nresult: the remote results for the node
1867     @param bridges: the expected list of bridges
1868
1869     """
1870     if not bridges:
1871       return
1872
1873     node = ninfo.name
1874     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1875
1876     missing = nresult.get(constants.NV_BRIDGES, None)
1877     test = not isinstance(missing, list)
1878     _ErrorIf(test, self.ENODENET, node,
1879              "did not return valid bridge information")
1880     if not test:
1881       _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1882                utils.CommaJoin(sorted(missing)))
1883
1884   def _VerifyNodeNetwork(self, ninfo, nresult):
1885     """Check the node network connectivity results.
1886
1887     @type ninfo: L{objects.Node}
1888     @param ninfo: the node to check
1889     @param nresult: the remote results for the node
1890
1891     """
1892     node = ninfo.name
1893     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1894
1895     test = constants.NV_NODELIST not in nresult
1896     _ErrorIf(test, self.ENODESSH, node,
1897              "node hasn't returned node ssh connectivity data")
1898     if not test:
1899       if nresult[constants.NV_NODELIST]:
1900         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1901           _ErrorIf(True, self.ENODESSH, node,
1902                    "ssh communication with node '%s': %s", a_node, a_msg)
1903
1904     test = constants.NV_NODENETTEST not in nresult
1905     _ErrorIf(test, self.ENODENET, node,
1906              "node hasn't returned node tcp connectivity data")
1907     if not test:
1908       if nresult[constants.NV_NODENETTEST]:
1909         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1910         for anode in nlist:
1911           _ErrorIf(True, self.ENODENET, node,
1912                    "tcp communication with node '%s': %s",
1913                    anode, nresult[constants.NV_NODENETTEST][anode])
1914
1915     test = constants.NV_MASTERIP not in nresult
1916     _ErrorIf(test, self.ENODENET, node,
1917              "node hasn't returned node master IP reachability data")
1918     if not test:
1919       if not nresult[constants.NV_MASTERIP]:
1920         if node == self.master_node:
1921           msg = "the master node cannot reach the master IP (not configured?)"
1922         else:
1923           msg = "cannot reach the master IP"
1924         _ErrorIf(True, self.ENODENET, node, msg)
1925
1926   def _VerifyInstance(self, instance, instanceconfig, node_image,
1927                       diskstatus):
1928     """Verify an instance.
1929
1930     This function checks to see if the required block devices are
1931     available on the instance's node.
1932
1933     """
1934     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1935     node_current = instanceconfig.primary_node
1936
1937     node_vol_should = {}
1938     instanceconfig.MapLVsByNode(node_vol_should)
1939
1940     for node in node_vol_should:
1941       n_img = node_image[node]
1942       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1943         # ignore missing volumes on offline or broken nodes
1944         continue
1945       for volume in node_vol_should[node]:
1946         test = volume not in n_img.volumes
1947         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1948                  "volume %s missing on node %s", volume, node)
1949
1950     if instanceconfig.admin_up:
1951       pri_img = node_image[node_current]
1952       test = instance not in pri_img.instances and not pri_img.offline
1953       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1954                "instance not running on its primary node %s",
1955                node_current)
1956
1957     diskdata = [(nname, success, status, idx)
1958                 for (nname, disks) in diskstatus.items()
1959                 for idx, (success, status) in enumerate(disks)]
1960
1961     for nname, success, bdev_status, idx in diskdata:
1962       # the 'ghost node' construction in Exec() ensures that we have a
1963       # node here
1964       snode = node_image[nname]
1965       bad_snode = snode.ghost or snode.offline
1966       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1967                self.EINSTANCEFAULTYDISK, instance,
1968                "couldn't retrieve status for disk/%s on %s: %s",
1969                idx, nname, bdev_status)
1970       _ErrorIf((instanceconfig.admin_up and success and
1971                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1972                self.EINSTANCEFAULTYDISK, instance,
1973                "disk/%s on %s is faulty", idx, nname)
1974
1975   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1976     """Verify if there are any unknown volumes in the cluster.
1977
1978     The .os, .swap and backup volumes are ignored. All other volumes are
1979     reported as unknown.
1980
1981     @type reserved: L{ganeti.utils.FieldSet}
1982     @param reserved: a FieldSet of reserved volume names
1983
1984     """
1985     for node, n_img in node_image.items():
1986       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1987         # skip non-healthy nodes
1988         continue
1989       for volume in n_img.volumes:
1990         test = ((node not in node_vol_should or
1991                 volume not in node_vol_should[node]) and
1992                 not reserved.Matches(volume))
1993         self._ErrorIf(test, self.ENODEORPHANLV, node,
1994                       "volume %s is unknown", volume)
1995
1996   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1997     """Verify N+1 Memory Resilience.
1998
1999     Check that if one single node dies we can still start all the
2000     instances it was primary for.
2001
2002     """
2003     cluster_info = self.cfg.GetClusterInfo()
2004     for node, n_img in node_image.items():
2005       # This code checks that every node which is now listed as
2006       # secondary has enough memory to host all instances it is
2007       # supposed to should a single other node in the cluster fail.
2008       # FIXME: not ready for failover to an arbitrary node
2009       # FIXME: does not support file-backed instances
2010       # WARNING: we currently take into account down instances as well
2011       # as up ones, considering that even if they're down someone
2012       # might want to start them even in the event of a node failure.
2013       if n_img.offline:
2014         # we're skipping offline nodes from the N+1 warning, since
2015         # most likely we don't have good memory infromation from them;
2016         # we already list instances living on such nodes, and that's
2017         # enough warning
2018         continue
2019       for prinode, instances in n_img.sbp.items():
2020         needed_mem = 0
2021         for instance in instances:
2022           bep = cluster_info.FillBE(instance_cfg[instance])
2023           if bep[constants.BE_AUTO_BALANCE]:
2024             needed_mem += bep[constants.BE_MEMORY]
2025         test = n_img.mfree < needed_mem
2026         self._ErrorIf(test, self.ENODEN1, node,
2027                       "not enough memory to accomodate instance failovers"
2028                       " should node %s fail (%dMiB needed, %dMiB available)",
2029                       prinode, needed_mem, n_img.mfree)
2030
2031   @classmethod
2032   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2033                    (files_all, files_all_opt, files_mc, files_vm)):
2034     """Verifies file checksums collected from all nodes.
2035
2036     @param errorif: Callback for reporting errors
2037     @param nodeinfo: List of L{objects.Node} objects
2038     @param master_node: Name of master node
2039     @param all_nvinfo: RPC results
2040
2041     """
2042     node_names = frozenset(node.name for node in nodeinfo if not node.offline)
2043
2044     assert master_node in node_names
2045     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2046             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2047            "Found file listed in more than one file list"
2048
2049     # Define functions determining which nodes to consider for a file
2050     file2nodefn = dict([(filename, fn)
2051       for (files, fn) in [(files_all, None),
2052                           (files_all_opt, None),
2053                           (files_mc, lambda node: (node.master_candidate or
2054                                                    node.name == master_node)),
2055                           (files_vm, lambda node: node.vm_capable)]
2056       for filename in files])
2057
2058     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2059
2060     for node in nodeinfo:
2061       if node.offline:
2062         continue
2063
2064       nresult = all_nvinfo[node.name]
2065
2066       if nresult.fail_msg or not nresult.payload:
2067         node_files = None
2068       else:
2069         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2070
2071       test = not (node_files and isinstance(node_files, dict))
2072       errorif(test, cls.ENODEFILECHECK, node.name,
2073               "Node did not return file checksum data")
2074       if test:
2075         continue
2076
2077       for (filename, checksum) in node_files.items():
2078         # Check if the file should be considered for a node
2079         fn = file2nodefn[filename]
2080         if fn is None or fn(node):
2081           fileinfo[filename].setdefault(checksum, set()).add(node.name)
2082
2083     for (filename, checksums) in fileinfo.items():
2084       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2085
2086       # Nodes having the file
2087       with_file = frozenset(node_name
2088                             for nodes in fileinfo[filename].values()
2089                             for node_name in nodes)
2090
2091       # Nodes missing file
2092       missing_file = node_names - with_file
2093
2094       if filename in files_all_opt:
2095         # All or no nodes
2096         errorif(missing_file and missing_file != node_names,
2097                 cls.ECLUSTERFILECHECK, None,
2098                 "File %s is optional, but it must exist on all or no"
2099                 " nodes (not found on %s)",
2100                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2101       else:
2102         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2103                 "File %s is missing from node(s) %s", filename,
2104                 utils.CommaJoin(utils.NiceSort(missing_file)))
2105
2106       # See if there are multiple versions of the file
2107       test = len(checksums) > 1
2108       if test:
2109         variants = ["variant %s on %s" %
2110                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2111                     for (idx, (checksum, nodes)) in
2112                       enumerate(sorted(checksums.items()))]
2113       else:
2114         variants = []
2115
2116       errorif(test, cls.ECLUSTERFILECHECK, None,
2117               "File %s found with %s different checksums (%s)",
2118               filename, len(checksums), "; ".join(variants))
2119
2120   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2121                       drbd_map):
2122     """Verifies and the node DRBD status.
2123
2124     @type ninfo: L{objects.Node}
2125     @param ninfo: the node to check
2126     @param nresult: the remote results for the node
2127     @param instanceinfo: the dict of instances
2128     @param drbd_helper: the configured DRBD usermode helper
2129     @param drbd_map: the DRBD map as returned by
2130         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2131
2132     """
2133     node = ninfo.name
2134     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2135
2136     if drbd_helper:
2137       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2138       test = (helper_result == None)
2139       _ErrorIf(test, self.ENODEDRBDHELPER, node,
2140                "no drbd usermode helper returned")
2141       if helper_result:
2142         status, payload = helper_result
2143         test = not status
2144         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2145                  "drbd usermode helper check unsuccessful: %s", payload)
2146         test = status and (payload != drbd_helper)
2147         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2148                  "wrong drbd usermode helper: %s", payload)
2149
2150     # compute the DRBD minors
2151     node_drbd = {}
2152     for minor, instance in drbd_map[node].items():
2153       test = instance not in instanceinfo
2154       _ErrorIf(test, self.ECLUSTERCFG, None,
2155                "ghost instance '%s' in temporary DRBD map", instance)
2156         # ghost instance should not be running, but otherwise we
2157         # don't give double warnings (both ghost instance and
2158         # unallocated minor in use)
2159       if test:
2160         node_drbd[minor] = (instance, False)
2161       else:
2162         instance = instanceinfo[instance]
2163         node_drbd[minor] = (instance.name, instance.admin_up)
2164
2165     # and now check them
2166     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2167     test = not isinstance(used_minors, (tuple, list))
2168     _ErrorIf(test, self.ENODEDRBD, node,
2169              "cannot parse drbd status file: %s", str(used_minors))
2170     if test:
2171       # we cannot check drbd status
2172       return
2173
2174     for minor, (iname, must_exist) in node_drbd.items():
2175       test = minor not in used_minors and must_exist
2176       _ErrorIf(test, self.ENODEDRBD, node,
2177                "drbd minor %d of instance %s is not active", minor, iname)
2178     for minor in used_minors:
2179       test = minor not in node_drbd
2180       _ErrorIf(test, self.ENODEDRBD, node,
2181                "unallocated drbd minor %d is in use", minor)
2182
2183   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2184     """Builds the node OS structures.
2185
2186     @type ninfo: L{objects.Node}
2187     @param ninfo: the node to check
2188     @param nresult: the remote results for the node
2189     @param nimg: the node image object
2190
2191     """
2192     node = ninfo.name
2193     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2194
2195     remote_os = nresult.get(constants.NV_OSLIST, None)
2196     test = (not isinstance(remote_os, list) or
2197             not compat.all(isinstance(v, list) and len(v) == 7
2198                            for v in remote_os))
2199
2200     _ErrorIf(test, self.ENODEOS, node,
2201              "node hasn't returned valid OS data")
2202
2203     nimg.os_fail = test
2204
2205     if test:
2206       return
2207
2208     os_dict = {}
2209
2210     for (name, os_path, status, diagnose,
2211          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2212
2213       if name not in os_dict:
2214         os_dict[name] = []
2215
2216       # parameters is a list of lists instead of list of tuples due to
2217       # JSON lacking a real tuple type, fix it:
2218       parameters = [tuple(v) for v in parameters]
2219       os_dict[name].append((os_path, status, diagnose,
2220                             set(variants), set(parameters), set(api_ver)))
2221
2222     nimg.oslist = os_dict
2223
2224   def _VerifyNodeOS(self, ninfo, nimg, base):
2225     """Verifies the node OS list.
2226
2227     @type ninfo: L{objects.Node}
2228     @param ninfo: the node to check
2229     @param nimg: the node image object
2230     @param base: the 'template' node we match against (e.g. from the master)
2231
2232     """
2233     node = ninfo.name
2234     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2235
2236     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2237
2238     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2239     for os_name, os_data in nimg.oslist.items():
2240       assert os_data, "Empty OS status for OS %s?!" % os_name
2241       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2242       _ErrorIf(not f_status, self.ENODEOS, node,
2243                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2244       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2245                "OS '%s' has multiple entries (first one shadows the rest): %s",
2246                os_name, utils.CommaJoin([v[0] for v in os_data]))
2247       # comparisons with the 'base' image
2248       test = os_name not in base.oslist
2249       _ErrorIf(test, self.ENODEOS, node,
2250                "Extra OS %s not present on reference node (%s)",
2251                os_name, base.name)
2252       if test:
2253         continue
2254       assert base.oslist[os_name], "Base node has empty OS status?"
2255       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2256       if not b_status:
2257         # base OS is invalid, skipping
2258         continue
2259       for kind, a, b in [("API version", f_api, b_api),
2260                          ("variants list", f_var, b_var),
2261                          ("parameters", beautify_params(f_param),
2262                           beautify_params(b_param))]:
2263         _ErrorIf(a != b, self.ENODEOS, node,
2264                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2265                  kind, os_name, base.name,
2266                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2267
2268     # check any missing OSes
2269     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2270     _ErrorIf(missing, self.ENODEOS, node,
2271              "OSes present on reference node %s but missing on this node: %s",
2272              base.name, utils.CommaJoin(missing))
2273
2274   def _VerifyOob(self, ninfo, nresult):
2275     """Verifies out of band functionality of a node.
2276
2277     @type ninfo: L{objects.Node}
2278     @param ninfo: the node to check
2279     @param nresult: the remote results for the node
2280
2281     """
2282     node = ninfo.name
2283     # We just have to verify the paths on master and/or master candidates
2284     # as the oob helper is invoked on the master
2285     if ((ninfo.master_candidate or ninfo.master_capable) and
2286         constants.NV_OOB_PATHS in nresult):
2287       for path_result in nresult[constants.NV_OOB_PATHS]:
2288         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2289
2290   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2291     """Verifies and updates the node volume data.
2292
2293     This function will update a L{NodeImage}'s internal structures
2294     with data from the remote call.
2295
2296     @type ninfo: L{objects.Node}
2297     @param ninfo: the node to check
2298     @param nresult: the remote results for the node
2299     @param nimg: the node image object
2300     @param vg_name: the configured VG name
2301
2302     """
2303     node = ninfo.name
2304     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2305
2306     nimg.lvm_fail = True
2307     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2308     if vg_name is None:
2309       pass
2310     elif isinstance(lvdata, basestring):
2311       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2312                utils.SafeEncode(lvdata))
2313     elif not isinstance(lvdata, dict):
2314       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2315     else:
2316       nimg.volumes = lvdata
2317       nimg.lvm_fail = False
2318
2319   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2320     """Verifies and updates the node instance list.
2321
2322     If the listing was successful, then updates this node's instance
2323     list. Otherwise, it marks the RPC call as failed for the instance
2324     list key.
2325
2326     @type ninfo: L{objects.Node}
2327     @param ninfo: the node to check
2328     @param nresult: the remote results for the node
2329     @param nimg: the node image object
2330
2331     """
2332     idata = nresult.get(constants.NV_INSTANCELIST, None)
2333     test = not isinstance(idata, list)
2334     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2335                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2336     if test:
2337       nimg.hyp_fail = True
2338     else:
2339       nimg.instances = idata
2340
2341   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2342     """Verifies and computes a node information map
2343
2344     @type ninfo: L{objects.Node}
2345     @param ninfo: the node to check
2346     @param nresult: the remote results for the node
2347     @param nimg: the node image object
2348     @param vg_name: the configured VG name
2349
2350     """
2351     node = ninfo.name
2352     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2353
2354     # try to read free memory (from the hypervisor)
2355     hv_info = nresult.get(constants.NV_HVINFO, None)
2356     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2357     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2358     if not test:
2359       try:
2360         nimg.mfree = int(hv_info["memory_free"])
2361       except (ValueError, TypeError):
2362         _ErrorIf(True, self.ENODERPC, node,
2363                  "node returned invalid nodeinfo, check hypervisor")
2364
2365     # FIXME: devise a free space model for file based instances as well
2366     if vg_name is not None:
2367       test = (constants.NV_VGLIST not in nresult or
2368               vg_name not in nresult[constants.NV_VGLIST])
2369       _ErrorIf(test, self.ENODELVM, node,
2370                "node didn't return data for the volume group '%s'"
2371                " - it is either missing or broken", vg_name)
2372       if not test:
2373         try:
2374           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2375         except (ValueError, TypeError):
2376           _ErrorIf(True, self.ENODERPC, node,
2377                    "node returned invalid LVM info, check LVM status")
2378
2379   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2380     """Gets per-disk status information for all instances.
2381
2382     @type nodelist: list of strings
2383     @param nodelist: Node names
2384     @type node_image: dict of (name, L{objects.Node})
2385     @param node_image: Node objects
2386     @type instanceinfo: dict of (name, L{objects.Instance})
2387     @param instanceinfo: Instance objects
2388     @rtype: {instance: {node: [(succes, payload)]}}
2389     @return: a dictionary of per-instance dictionaries with nodes as
2390         keys and disk information as values; the disk information is a
2391         list of tuples (success, payload)
2392
2393     """
2394     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2395
2396     node_disks = {}
2397     node_disks_devonly = {}
2398     diskless_instances = set()
2399     diskless = constants.DT_DISKLESS
2400
2401     for nname in nodelist:
2402       node_instances = list(itertools.chain(node_image[nname].pinst,
2403                                             node_image[nname].sinst))
2404       diskless_instances.update(inst for inst in node_instances
2405                                 if instanceinfo[inst].disk_template == diskless)
2406       disks = [(inst, disk)
2407                for inst in node_instances
2408                for disk in instanceinfo[inst].disks]
2409
2410       if not disks:
2411         # No need to collect data
2412         continue
2413
2414       node_disks[nname] = disks
2415
2416       # Creating copies as SetDiskID below will modify the objects and that can
2417       # lead to incorrect data returned from nodes
2418       devonly = [dev.Copy() for (_, dev) in disks]
2419
2420       for dev in devonly:
2421         self.cfg.SetDiskID(dev, nname)
2422
2423       node_disks_devonly[nname] = devonly
2424
2425     assert len(node_disks) == len(node_disks_devonly)
2426
2427     # Collect data from all nodes with disks
2428     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2429                                                           node_disks_devonly)
2430
2431     assert len(result) == len(node_disks)
2432
2433     instdisk = {}
2434
2435     for (nname, nres) in result.items():
2436       disks = node_disks[nname]
2437
2438       if nres.offline:
2439         # No data from this node
2440         data = len(disks) * [(False, "node offline")]
2441       else:
2442         msg = nres.fail_msg
2443         _ErrorIf(msg, self.ENODERPC, nname,
2444                  "while getting disk information: %s", msg)
2445         if msg:
2446           # No data from this node
2447           data = len(disks) * [(False, msg)]
2448         else:
2449           data = []
2450           for idx, i in enumerate(nres.payload):
2451             if isinstance(i, (tuple, list)) and len(i) == 2:
2452               data.append(i)
2453             else:
2454               logging.warning("Invalid result from node %s, entry %d: %s",
2455                               nname, idx, i)
2456               data.append((False, "Invalid result from the remote node"))
2457
2458       for ((inst, _), status) in zip(disks, data):
2459         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2460
2461     # Add empty entries for diskless instances.
2462     for inst in diskless_instances:
2463       assert inst not in instdisk
2464       instdisk[inst] = {}
2465
2466     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2467                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2468                       compat.all(isinstance(s, (tuple, list)) and
2469                                  len(s) == 2 for s in statuses)
2470                       for inst, nnames in instdisk.items()
2471                       for nname, statuses in nnames.items())
2472     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2473
2474     return instdisk
2475
2476   def BuildHooksEnv(self):
2477     """Build hooks env.
2478
2479     Cluster-Verify hooks just ran in the post phase and their failure makes
2480     the output be logged in the verify output and the verification to fail.
2481
2482     """
2483     env = {
2484       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2485       }
2486
2487     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2488                for node in self.my_node_info.values())
2489
2490     return env
2491
2492   def BuildHooksNodes(self):
2493     """Build hooks nodes.
2494
2495     """
2496     return ([], self.my_node_names)
2497
2498   def Exec(self, feedback_fn):
2499     """Verify integrity of the node group, performing various test on nodes.
2500
2501     """
2502     # This method has too many local variables. pylint: disable-msg=R0914
2503
2504     if not self.my_node_names:
2505       # empty node group
2506       feedback_fn("* Empty node group, skipping verification")
2507       return True
2508
2509     self.bad = False
2510     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2511     verbose = self.op.verbose
2512     self._feedback_fn = feedback_fn
2513
2514     vg_name = self.cfg.GetVGName()
2515     drbd_helper = self.cfg.GetDRBDHelper()
2516     cluster = self.cfg.GetClusterInfo()
2517     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2518     hypervisors = cluster.enabled_hypervisors
2519     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2520
2521     i_non_redundant = [] # Non redundant instances
2522     i_non_a_balanced = [] # Non auto-balanced instances
2523     n_offline = 0 # Count of offline nodes
2524     n_drained = 0 # Count of nodes being drained
2525     node_vol_should = {}
2526
2527     # FIXME: verify OS list
2528
2529     # File verification
2530     filemap = _ComputeAncillaryFiles(cluster, False)
2531
2532     # do local checksums
2533     master_node = self.master_node = self.cfg.GetMasterNode()
2534     master_ip = self.cfg.GetMasterIP()
2535
2536     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2537
2538     # We will make nodes contact all nodes in their group, and one node from
2539     # every other group.
2540     # TODO: should it be a *random* node, different every time?
2541     online_nodes = [node.name for node in node_data_list if not node.offline]
2542     other_group_nodes = {}
2543
2544     for name in sorted(self.all_node_info):
2545       node = self.all_node_info[name]
2546       if (node.group not in other_group_nodes
2547           and node.group != self.group_uuid
2548           and not node.offline):
2549         other_group_nodes[node.group] = node.name
2550
2551     node_verify_param = {
2552       constants.NV_FILELIST:
2553         utils.UniqueSequence(filename
2554                              for files in filemap
2555                              for filename in files),
2556       constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2557       constants.NV_HYPERVISOR: hypervisors,
2558       constants.NV_HVPARAMS:
2559         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2560       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2561                                  for node in node_data_list
2562                                  if not node.offline],
2563       constants.NV_INSTANCELIST: hypervisors,
2564       constants.NV_VERSION: None,
2565       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2566       constants.NV_NODESETUP: None,
2567       constants.NV_TIME: None,
2568       constants.NV_MASTERIP: (master_node, master_ip),
2569       constants.NV_OSLIST: None,
2570       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2571       }
2572
2573     if vg_name is not None:
2574       node_verify_param[constants.NV_VGLIST] = None
2575       node_verify_param[constants.NV_LVLIST] = vg_name
2576       node_verify_param[constants.NV_PVLIST] = [vg_name]
2577       node_verify_param[constants.NV_DRBDLIST] = None
2578
2579     if drbd_helper:
2580       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2581
2582     # bridge checks
2583     # FIXME: this needs to be changed per node-group, not cluster-wide
2584     bridges = set()
2585     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2586     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2587       bridges.add(default_nicpp[constants.NIC_LINK])
2588     for instance in self.my_inst_info.values():
2589       for nic in instance.nics:
2590         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2591         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2592           bridges.add(full_nic[constants.NIC_LINK])
2593
2594     if bridges:
2595       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2596
2597     # Build our expected cluster state
2598     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2599                                                  name=node.name,
2600                                                  vm_capable=node.vm_capable))
2601                       for node in node_data_list)
2602
2603     # Gather OOB paths
2604     oob_paths = []
2605     for node in self.all_node_info.values():
2606       path = _SupportsOob(self.cfg, node)
2607       if path and path not in oob_paths:
2608         oob_paths.append(path)
2609
2610     if oob_paths:
2611       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2612
2613     for instance in self.my_inst_names:
2614       inst_config = self.my_inst_info[instance]
2615
2616       for nname in inst_config.all_nodes:
2617         if nname not in node_image:
2618           gnode = self.NodeImage(name=nname)
2619           gnode.ghost = (nname not in self.all_node_info)
2620           node_image[nname] = gnode
2621
2622       inst_config.MapLVsByNode(node_vol_should)
2623
2624       pnode = inst_config.primary_node
2625       node_image[pnode].pinst.append(instance)
2626
2627       for snode in inst_config.secondary_nodes:
2628         nimg = node_image[snode]
2629         nimg.sinst.append(instance)
2630         if pnode not in nimg.sbp:
2631           nimg.sbp[pnode] = []
2632         nimg.sbp[pnode].append(instance)
2633
2634     # At this point, we have the in-memory data structures complete,
2635     # except for the runtime information, which we'll gather next
2636
2637     # Due to the way our RPC system works, exact response times cannot be
2638     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2639     # time before and after executing the request, we can at least have a time
2640     # window.
2641     nvinfo_starttime = time.time()
2642     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2643                                            node_verify_param,
2644                                            self.cfg.GetClusterName())
2645     nvinfo_endtime = time.time()
2646
2647     if self.extra_lv_nodes and vg_name is not None:
2648       extra_lv_nvinfo = \
2649           self.rpc.call_node_verify(self.extra_lv_nodes,
2650                                     {constants.NV_LVLIST: vg_name},
2651                                     self.cfg.GetClusterName())
2652     else:
2653       extra_lv_nvinfo = {}
2654
2655     all_drbd_map = self.cfg.ComputeDRBDMap()
2656
2657     feedback_fn("* Gathering disk information (%s nodes)" %
2658                 len(self.my_node_names))
2659     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2660                                      self.my_inst_info)
2661
2662     feedback_fn("* Verifying configuration file consistency")
2663
2664     # If not all nodes are being checked, we need to make sure the master node
2665     # and a non-checked vm_capable node are in the list.
2666     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2667     if absent_nodes:
2668       vf_nvinfo = all_nvinfo.copy()
2669       vf_node_info = list(self.my_node_info.values())
2670       additional_nodes = []
2671       if master_node not in self.my_node_info:
2672         additional_nodes.append(master_node)
2673         vf_node_info.append(self.all_node_info[master_node])
2674       # Add the first vm_capable node we find which is not included
2675       for node in absent_nodes:
2676         nodeinfo = self.all_node_info[node]
2677         if nodeinfo.vm_capable and not nodeinfo.offline:
2678           additional_nodes.append(node)
2679           vf_node_info.append(self.all_node_info[node])
2680           break
2681       key = constants.NV_FILELIST
2682       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2683                                                  {key: node_verify_param[key]},
2684                                                  self.cfg.GetClusterName()))
2685     else:
2686       vf_nvinfo = all_nvinfo
2687       vf_node_info = self.my_node_info.values()
2688
2689     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2690
2691     feedback_fn("* Verifying node status")
2692
2693     refos_img = None
2694
2695     for node_i in node_data_list:
2696       node = node_i.name
2697       nimg = node_image[node]
2698
2699       if node_i.offline:
2700         if verbose:
2701           feedback_fn("* Skipping offline node %s" % (node,))
2702         n_offline += 1
2703         continue
2704
2705       if node == master_node:
2706         ntype = "master"
2707       elif node_i.master_candidate:
2708         ntype = "master candidate"
2709       elif node_i.drained:
2710         ntype = "drained"
2711         n_drained += 1
2712       else:
2713         ntype = "regular"
2714       if verbose:
2715         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2716
2717       msg = all_nvinfo[node].fail_msg
2718       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2719       if msg:
2720         nimg.rpc_fail = True
2721         continue
2722
2723       nresult = all_nvinfo[node].payload
2724
2725       nimg.call_ok = self._VerifyNode(node_i, nresult)
2726       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2727       self._VerifyNodeNetwork(node_i, nresult)
2728       self._VerifyOob(node_i, nresult)
2729
2730       if nimg.vm_capable:
2731         self._VerifyNodeLVM(node_i, nresult, vg_name)
2732         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2733                              all_drbd_map)
2734
2735         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2736         self._UpdateNodeInstances(node_i, nresult, nimg)
2737         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2738         self._UpdateNodeOS(node_i, nresult, nimg)
2739
2740         if not nimg.os_fail:
2741           if refos_img is None:
2742             refos_img = nimg
2743           self._VerifyNodeOS(node_i, nimg, refos_img)
2744         self._VerifyNodeBridges(node_i, nresult, bridges)
2745
2746         # Check whether all running instancies are primary for the node. (This
2747         # can no longer be done from _VerifyInstance below, since some of the
2748         # wrong instances could be from other node groups.)
2749         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2750
2751         for inst in non_primary_inst:
2752           test = inst in self.all_inst_info
2753           _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2754                    "instance should not run on node %s", node_i.name)
2755           _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2756                    "node is running unknown instance %s", inst)
2757
2758     for node, result in extra_lv_nvinfo.items():
2759       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2760                               node_image[node], vg_name)
2761
2762     feedback_fn("* Verifying instance status")
2763     for instance in self.my_inst_names:
2764       if verbose:
2765         feedback_fn("* Verifying instance %s" % instance)
2766       inst_config = self.my_inst_info[instance]
2767       self._VerifyInstance(instance, inst_config, node_image,
2768                            instdisk[instance])
2769       inst_nodes_offline = []
2770
2771       pnode = inst_config.primary_node
2772       pnode_img = node_image[pnode]
2773       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2774                self.ENODERPC, pnode, "instance %s, connection to"
2775                " primary node failed", instance)
2776
2777       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2778                self.EINSTANCEBADNODE, instance,
2779                "instance is marked as running and lives on offline node %s",
2780                inst_config.primary_node)
2781
2782       # If the instance is non-redundant we cannot survive losing its primary
2783       # node, so we are not N+1 compliant. On the other hand we have no disk
2784       # templates with more than one secondary so that situation is not well
2785       # supported either.
2786       # FIXME: does not support file-backed instances
2787       if not inst_config.secondary_nodes:
2788         i_non_redundant.append(instance)
2789
2790       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2791                instance, "instance has multiple secondary nodes: %s",
2792                utils.CommaJoin(inst_config.secondary_nodes),
2793                code=self.ETYPE_WARNING)
2794
2795       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2796         pnode = inst_config.primary_node
2797         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2798         instance_groups = {}
2799
2800         for node in instance_nodes:
2801           instance_groups.setdefault(self.all_node_info[node].group,
2802                                      []).append(node)
2803
2804         pretty_list = [
2805           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2806           # Sort so that we always list the primary node first.
2807           for group, nodes in sorted(instance_groups.items(),
2808                                      key=lambda (_, nodes): pnode in nodes,
2809                                      reverse=True)]
2810
2811         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2812                       instance, "instance has primary and secondary nodes in"
2813                       " different groups: %s", utils.CommaJoin(pretty_list),
2814                       code=self.ETYPE_WARNING)
2815
2816       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2817         i_non_a_balanced.append(instance)
2818
2819       for snode in inst_config.secondary_nodes:
2820         s_img = node_image[snode]
2821         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2822                  "instance %s, connection to secondary node failed", instance)
2823
2824         if s_img.offline:
2825           inst_nodes_offline.append(snode)
2826
2827       # warn that the instance lives on offline nodes
2828       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2829                "instance has offline secondary node(s) %s",
2830                utils.CommaJoin(inst_nodes_offline))
2831       # ... or ghost/non-vm_capable nodes
2832       for node in inst_config.all_nodes:
2833         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2834                  "instance lives on ghost node %s", node)
2835         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2836                  instance, "instance lives on non-vm_capable node %s", node)
2837
2838     feedback_fn("* Verifying orphan volumes")
2839     reserved = utils.FieldSet(*cluster.reserved_lvs)
2840
2841     # We will get spurious "unknown volume" warnings if any node of this group
2842     # is secondary for an instance whose primary is in another group. To avoid
2843     # them, we find these instances and add their volumes to node_vol_should.
2844     for inst in self.all_inst_info.values():
2845       for secondary in inst.secondary_nodes:
2846         if (secondary in self.my_node_info
2847             and inst.name not in self.my_inst_info):
2848           inst.MapLVsByNode(node_vol_should)
2849           break
2850
2851     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2852
2853     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2854       feedback_fn("* Verifying N+1 Memory redundancy")
2855       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2856
2857     feedback_fn("* Other Notes")
2858     if i_non_redundant:
2859       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2860                   % len(i_non_redundant))
2861
2862     if i_non_a_balanced:
2863       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2864                   % len(i_non_a_balanced))
2865
2866     if n_offline:
2867       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2868
2869     if n_drained:
2870       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2871
2872     return not self.bad
2873
2874   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2875     """Analyze the post-hooks' result
2876
2877     This method analyses the hook result, handles it, and sends some
2878     nicely-formatted feedback back to the user.
2879
2880     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2881         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2882     @param hooks_results: the results of the multi-node hooks rpc call
2883     @param feedback_fn: function used send feedback back to the caller
2884     @param lu_result: previous Exec result
2885     @return: the new Exec result, based on the previous result
2886         and hook results
2887
2888     """
2889     # We only really run POST phase hooks, only for non-empty groups,
2890     # and are only interested in their results
2891     if not self.my_node_names:
2892       # empty node group
2893       pass
2894     elif phase == constants.HOOKS_PHASE_POST:
2895       # Used to change hooks' output to proper indentation
2896       feedback_fn("* Hooks Results")
2897       assert hooks_results, "invalid result from hooks"
2898
2899       for node_name in hooks_results:
2900         res = hooks_results[node_name]
2901         msg = res.fail_msg
2902         test = msg and not res.offline
2903         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2904                       "Communication failure in hooks execution: %s", msg)
2905         if res.offline or msg:
2906           # No need to investigate payload if node is offline or gave an error.
2907           # override manually lu_result here as _ErrorIf only
2908           # overrides self.bad
2909           lu_result = 1
2910           continue
2911         for script, hkr, output in res.payload:
2912           test = hkr == constants.HKR_FAIL
2913           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2914                         "Script %s failed, output:", script)
2915           if test:
2916             output = self._HOOKS_INDENT_RE.sub("      ", output)
2917             feedback_fn("%s" % output)
2918             lu_result = 0
2919
2920     return lu_result
2921
2922
2923 class LUClusterVerifyDisks(NoHooksLU):
2924   """Verifies the cluster disks status.
2925
2926   """
2927   REQ_BGL = False
2928
2929   def ExpandNames(self):
2930     self.share_locks = _ShareAll()
2931     self.needed_locks = {
2932       locking.LEVEL_NODEGROUP: locking.ALL_SET,
2933       }
2934
2935   def Exec(self, feedback_fn):
2936     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
2937
2938     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
2939     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
2940                            for group in group_names])
2941
2942
2943 class LUGroupVerifyDisks(NoHooksLU):
2944   """Verifies the status of all disks in a node group.
2945
2946   """
2947   REQ_BGL = False
2948
2949   def ExpandNames(self):
2950     # Raises errors.OpPrereqError on its own if group can't be found
2951     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2952
2953     self.share_locks = _ShareAll()
2954     self.needed_locks = {
2955       locking.LEVEL_INSTANCE: [],
2956       locking.LEVEL_NODEGROUP: [],
2957       locking.LEVEL_NODE: [],
2958       }
2959
2960   def DeclareLocks(self, level):
2961     if level == locking.LEVEL_INSTANCE:
2962       assert not self.needed_locks[locking.LEVEL_INSTANCE]
2963
2964       # Lock instances optimistically, needs verification once node and group
2965       # locks have been acquired
2966       self.needed_locks[locking.LEVEL_INSTANCE] = \
2967         self.cfg.GetNodeGroupInstances(self.group_uuid)
2968
2969     elif level == locking.LEVEL_NODEGROUP:
2970       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
2971
2972       self.needed_locks[locking.LEVEL_NODEGROUP] = \
2973         set([self.group_uuid] +
2974             # Lock all groups used by instances optimistically; this requires
2975             # going via the node before it's locked, requiring verification
2976             # later on
2977             [group_uuid
2978              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
2979              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
2980
2981     elif level == locking.LEVEL_NODE:
2982       # This will only lock the nodes in the group to be verified which contain
2983       # actual instances
2984       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
2985       self._LockInstancesNodes()
2986
2987       # Lock all nodes in group to be verified
2988       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2989       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
2990       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
2991
2992   def CheckPrereq(self):
2993     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
2994     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
2995     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
2996
2997     assert self.group_uuid in owned_groups
2998
2999     # Check if locked instances are still correct
3000     wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
3001     if owned_instances != wanted_instances:
3002       raise errors.OpPrereqError("Instances in node group %s changed since"
3003                                  " locks were acquired, wanted %s, have %s;"
3004                                  " retry the operation" %
3005                                  (self.op.group_name,
3006                                   utils.CommaJoin(wanted_instances),
3007                                   utils.CommaJoin(owned_instances)),
3008                                  errors.ECODE_STATE)
3009
3010     # Get instance information
3011     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3012
3013     # Check if node groups for locked instances are still correct
3014     for (instance_name, inst) in self.instances.items():
3015       assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
3016         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3017       assert owned_nodes.issuperset(inst.all_nodes), \
3018         "Instance %s's nodes changed while we kept the lock" % instance_name
3019
3020       _CheckInstanceNodeGroups(self.cfg, instance_name, owned_groups)
3021
3022   def Exec(self, feedback_fn):
3023     """Verify integrity of cluster disks.
3024
3025     @rtype: tuple of three items
3026     @return: a tuple of (dict of node-to-node_error, list of instances
3027         which need activate-disks, dict of instance: (node, volume) for
3028         missing volumes
3029
3030     """
3031     res_nodes = {}
3032     res_instances = set()
3033     res_missing = {}
3034
3035     nv_dict = _MapInstanceDisksToNodes([inst
3036                                         for inst in self.instances.values()
3037                                         if inst.admin_up])
3038
3039     if nv_dict:
3040       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3041                              set(self.cfg.GetVmCapableNodeList()))
3042
3043       node_lvs = self.rpc.call_lv_list(nodes, [])
3044
3045       for (node, node_res) in node_lvs.items():
3046         if node_res.offline:
3047           continue
3048
3049         msg = node_res.fail_msg
3050         if msg:
3051           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3052           res_nodes[node] = msg
3053           continue
3054
3055         for lv_name, (_, _, lv_online) in node_res.payload.items():
3056           inst = nv_dict.pop((node, lv_name), None)
3057           if not (lv_online or inst is None):
3058             res_instances.add(inst)
3059
3060       # any leftover items in nv_dict are missing LVs, let's arrange the data
3061       # better
3062       for key, inst in nv_dict.iteritems():
3063         res_missing.setdefault(inst, []).append(key)
3064
3065     return (res_nodes, list(res_instances), res_missing)
3066
3067
3068 class LUClusterRepairDiskSizes(NoHooksLU):
3069   """Verifies the cluster disks sizes.
3070
3071   """
3072   REQ_BGL = False
3073
3074   def ExpandNames(self):
3075     if self.op.instances:
3076       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3077       self.needed_locks = {
3078         locking.LEVEL_NODE: [],
3079         locking.LEVEL_INSTANCE: self.wanted_names,
3080         }
3081       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3082     else:
3083       self.wanted_names = None
3084       self.needed_locks = {
3085         locking.LEVEL_NODE: locking.ALL_SET,
3086         locking.LEVEL_INSTANCE: locking.ALL_SET,
3087         }
3088     self.share_locks = _ShareAll()
3089
3090   def DeclareLocks(self, level):
3091     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3092       self._LockInstancesNodes(primary_only=True)
3093
3094   def CheckPrereq(self):
3095     """Check prerequisites.
3096
3097     This only checks the optional instance list against the existing names.
3098
3099     """
3100     if self.wanted_names is None:
3101       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3102
3103     self.wanted_instances = \
3104         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3105
3106   def _EnsureChildSizes(self, disk):
3107     """Ensure children of the disk have the needed disk size.
3108
3109     This is valid mainly for DRBD8 and fixes an issue where the
3110     children have smaller disk size.
3111
3112     @param disk: an L{ganeti.objects.Disk} object
3113
3114     """
3115     if disk.dev_type == constants.LD_DRBD8:
3116       assert disk.children, "Empty children for DRBD8?"
3117       fchild = disk.children[0]
3118       mismatch = fchild.size < disk.size
3119       if mismatch:
3120         self.LogInfo("Child disk has size %d, parent %d, fixing",
3121                      fchild.size, disk.size)
3122         fchild.size = disk.size
3123
3124       # and we recurse on this child only, not on the metadev
3125       return self._EnsureChildSizes(fchild) or mismatch
3126     else:
3127       return False
3128
3129   def Exec(self, feedback_fn):
3130     """Verify the size of cluster disks.
3131
3132     """
3133     # TODO: check child disks too
3134     # TODO: check differences in size between primary/secondary nodes
3135     per_node_disks = {}
3136     for instance in self.wanted_instances:
3137       pnode = instance.primary_node
3138       if pnode not in per_node_disks:
3139         per_node_disks[pnode] = []
3140       for idx, disk in enumerate(instance.disks):
3141         per_node_disks[pnode].append((instance, idx, disk))
3142
3143     changed = []
3144     for node, dskl in per_node_disks.items():
3145       newl = [v[2].Copy() for v in dskl]
3146       for dsk in newl:
3147         self.cfg.SetDiskID(dsk, node)
3148       result = self.rpc.call_blockdev_getsize(node, newl)
3149       if result.fail_msg:
3150         self.LogWarning("Failure in blockdev_getsize call to node"
3151                         " %s, ignoring", node)
3152         continue
3153       if len(result.payload) != len(dskl):
3154         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3155                         " result.payload=%s", node, len(dskl), result.payload)
3156         self.LogWarning("Invalid result from node %s, ignoring node results",
3157                         node)
3158         continue
3159       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3160         if size is None:
3161           self.LogWarning("Disk %d of instance %s did not return size"
3162                           " information, ignoring", idx, instance.name)
3163           continue
3164         if not isinstance(size, (int, long)):
3165           self.LogWarning("Disk %d of instance %s did not return valid"
3166                           " size information, ignoring", idx, instance.name)
3167           continue
3168         size = size >> 20
3169         if size != disk.size:
3170           self.LogInfo("Disk %d of instance %s has mismatched size,"
3171                        " correcting: recorded %d, actual %d", idx,
3172                        instance.name, disk.size, size)
3173           disk.size = size
3174           self.cfg.Update(instance, feedback_fn)
3175           changed.append((instance.name, idx, size))
3176         if self._EnsureChildSizes(disk):
3177           self.cfg.Update(instance, feedback_fn)
3178           changed.append((instance.name, idx, disk.size))
3179     return changed
3180
3181
3182 class LUClusterRename(LogicalUnit):
3183   """Rename the cluster.
3184
3185   """
3186   HPATH = "cluster-rename"
3187   HTYPE = constants.HTYPE_CLUSTER
3188
3189   def BuildHooksEnv(self):
3190     """Build hooks env.
3191
3192     """
3193     return {
3194       "OP_TARGET": self.cfg.GetClusterName(),
3195       "NEW_NAME": self.op.name,
3196       }
3197
3198   def BuildHooksNodes(self):
3199     """Build hooks nodes.
3200
3201     """
3202     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3203
3204   def CheckPrereq(self):
3205     """Verify that the passed name is a valid one.
3206
3207     """
3208     hostname = netutils.GetHostname(name=self.op.name,
3209                                     family=self.cfg.GetPrimaryIPFamily())
3210
3211     new_name = hostname.name
3212     self.ip = new_ip = hostname.ip
3213     old_name = self.cfg.GetClusterName()
3214     old_ip = self.cfg.GetMasterIP()
3215     if new_name == old_name and new_ip == old_ip:
3216       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3217                                  " cluster has changed",
3218                                  errors.ECODE_INVAL)
3219     if new_ip != old_ip:
3220       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3221         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3222                                    " reachable on the network" %
3223                                    new_ip, errors.ECODE_NOTUNIQUE)
3224
3225     self.op.name = new_name
3226
3227   def Exec(self, feedback_fn):
3228     """Rename the cluster.
3229
3230     """
3231     clustername = self.op.name
3232     ip = self.ip
3233
3234     # shutdown the master IP
3235     master = self.cfg.GetMasterNode()
3236     result = self.rpc.call_node_stop_master(master, False)
3237     result.Raise("Could not disable the master role")
3238
3239     try:
3240       cluster = self.cfg.GetClusterInfo()
3241       cluster.cluster_name = clustername
3242       cluster.master_ip = ip
3243       self.cfg.Update(cluster, feedback_fn)
3244
3245       # update the known hosts file
3246       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3247       node_list = self.cfg.GetOnlineNodeList()
3248       try:
3249         node_list.remove(master)
3250       except ValueError:
3251         pass
3252       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3253     finally:
3254       result = self.rpc.call_node_start_master(master, False, False)
3255       msg = result.fail_msg
3256       if msg:
3257         self.LogWarning("Could not re-enable the master role on"
3258                         " the master, please restart manually: %s", msg)
3259
3260     return clustername
3261
3262
3263 class LUClusterSetParams(LogicalUnit):
3264   """Change the parameters of the cluster.
3265
3266   """
3267   HPATH = "cluster-modify"
3268   HTYPE = constants.HTYPE_CLUSTER
3269   REQ_BGL = False
3270
3271   def CheckArguments(self):
3272     """Check parameters
3273
3274     """
3275     if self.op.uid_pool:
3276       uidpool.CheckUidPool(self.op.uid_pool)
3277
3278     if self.op.add_uids:
3279       uidpool.CheckUidPool(self.op.add_uids)
3280
3281     if self.op.remove_uids:
3282       uidpool.CheckUidPool(self.op.remove_uids)
3283
3284   def ExpandNames(self):
3285     # FIXME: in the future maybe other cluster params won't require checking on
3286     # all nodes to be modified.
3287     self.needed_locks = {
3288       locking.LEVEL_NODE: locking.ALL_SET,
3289     }
3290     self.share_locks[locking.LEVEL_NODE] = 1
3291
3292   def BuildHooksEnv(self):
3293     """Build hooks env.
3294
3295     """
3296     return {
3297       "OP_TARGET": self.cfg.GetClusterName(),
3298       "NEW_VG_NAME": self.op.vg_name,
3299       }
3300
3301   def BuildHooksNodes(self):
3302     """Build hooks nodes.
3303
3304     """
3305     mn = self.cfg.GetMasterNode()
3306     return ([mn], [mn])
3307
3308   def CheckPrereq(self):
3309     """Check prerequisites.
3310
3311     This checks whether the given params don't conflict and
3312     if the given volume group is valid.
3313
3314     """
3315     if self.op.vg_name is not None and not self.op.vg_name:
3316       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3317         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3318                                    " instances exist", errors.ECODE_INVAL)
3319
3320     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3321       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3322         raise errors.OpPrereqError("Cannot disable drbd helper while"
3323                                    " drbd-based instances exist",
3324                                    errors.ECODE_INVAL)
3325
3326     node_list = self.owned_locks(locking.LEVEL_NODE)
3327
3328     # if vg_name not None, checks given volume group on all nodes
3329     if self.op.vg_name:
3330       vglist = self.rpc.call_vg_list(node_list)
3331       for node in node_list:
3332         msg = vglist[node].fail_msg
3333         if msg:
3334           # ignoring down node
3335           self.LogWarning("Error while gathering data on node %s"
3336                           " (ignoring node): %s", node, msg)
3337           continue
3338         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3339                                               self.op.vg_name,
3340                                               constants.MIN_VG_SIZE)
3341         if vgstatus:
3342           raise errors.OpPrereqError("Error on node '%s': %s" %
3343                                      (node, vgstatus), errors.ECODE_ENVIRON)
3344
3345     if self.op.drbd_helper:
3346       # checks given drbd helper on all nodes
3347       helpers = self.rpc.call_drbd_helper(node_list)
3348       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3349         if ninfo.offline:
3350           self.LogInfo("Not checking drbd helper on offline node %s", node)
3351           continue
3352         msg = helpers[node].fail_msg
3353         if msg:
3354           raise errors.OpPrereqError("Error checking drbd helper on node"
3355                                      " '%s': %s" % (node, msg),
3356                                      errors.ECODE_ENVIRON)
3357         node_helper = helpers[node].payload
3358         if node_helper != self.op.drbd_helper:
3359           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3360                                      (node, node_helper), errors.ECODE_ENVIRON)
3361
3362     self.cluster = cluster = self.cfg.GetClusterInfo()
3363     # validate params changes
3364     if self.op.beparams:
3365       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3366       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3367
3368     if self.op.ndparams:
3369       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3370       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3371
3372       # TODO: we need a more general way to handle resetting
3373       # cluster-level parameters to default values
3374       if self.new_ndparams["oob_program"] == "":
3375         self.new_ndparams["oob_program"] = \
3376             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3377
3378     if self.op.nicparams:
3379       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3380       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3381       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3382       nic_errors = []
3383
3384       # check all instances for consistency
3385       for instance in self.cfg.GetAllInstancesInfo().values():
3386         for nic_idx, nic in enumerate(instance.nics):
3387           params_copy = copy.deepcopy(nic.nicparams)
3388           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3389
3390           # check parameter syntax
3391           try:
3392             objects.NIC.CheckParameterSyntax(params_filled)
3393           except errors.ConfigurationError, err:
3394             nic_errors.append("Instance %s, nic/%d: %s" %
3395                               (instance.name, nic_idx, err))
3396
3397           # if we're moving instances to routed, check that they have an ip
3398           target_mode = params_filled[constants.NIC_MODE]
3399           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3400             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3401                               " address" % (instance.name, nic_idx))
3402       if nic_errors:
3403         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3404                                    "\n".join(nic_errors))
3405
3406     # hypervisor list/parameters
3407     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3408     if self.op.hvparams:
3409       for hv_name, hv_dict in self.op.hvparams.items():
3410         if hv_name not in self.new_hvparams:
3411           self.new_hvparams[hv_name] = hv_dict
3412         else:
3413           self.new_hvparams[hv_name].update(hv_dict)
3414
3415     # os hypervisor parameters
3416     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3417     if self.op.os_hvp:
3418       for os_name, hvs in self.op.os_hvp.items():
3419         if os_name not in self.new_os_hvp:
3420           self.new_os_hvp[os_name] = hvs
3421         else:
3422           for hv_name, hv_dict in hvs.items():
3423             if hv_name not in self.new_os_hvp[os_name]:
3424               self.new_os_hvp[os_name][hv_name] = hv_dict
3425             else:
3426               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3427
3428     # os parameters
3429     self.new_osp = objects.FillDict(cluster.osparams, {})
3430     if self.op.osparams:
3431       for os_name, osp in self.op.osparams.items():
3432         if os_name not in self.new_osp:
3433           self.new_osp[os_name] = {}
3434
3435         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3436                                                   use_none=True)
3437
3438         if not self.new_osp[os_name]:
3439           # we removed all parameters
3440           del self.new_osp[os_name]
3441         else:
3442           # check the parameter validity (remote check)
3443           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3444                          os_name, self.new_osp[os_name])
3445
3446     # changes to the hypervisor list
3447     if self.op.enabled_hypervisors is not None:
3448       self.hv_list = self.op.enabled_hypervisors
3449       for hv in self.hv_list:
3450         # if the hypervisor doesn't already exist in the cluster
3451         # hvparams, we initialize it to empty, and then (in both
3452         # cases) we make sure to fill the defaults, as we might not
3453         # have a complete defaults list if the hypervisor wasn't
3454         # enabled before
3455         if hv not in new_hvp:
3456           new_hvp[hv] = {}
3457         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3458         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3459     else:
3460       self.hv_list = cluster.enabled_hypervisors
3461
3462     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3463       # either the enabled list has changed, or the parameters have, validate
3464       for hv_name, hv_params in self.new_hvparams.items():
3465         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3466             (self.op.enabled_hypervisors and
3467              hv_name in self.op.enabled_hypervisors)):
3468           # either this is a new hypervisor, or its parameters have changed
3469           hv_class = hypervisor.GetHypervisor(hv_name)
3470           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3471           hv_class.CheckParameterSyntax(hv_params)
3472           _CheckHVParams(self, node_list, hv_name, hv_params)
3473
3474     if self.op.os_hvp:
3475       # no need to check any newly-enabled hypervisors, since the
3476       # defaults have already been checked in the above code-block
3477       for os_name, os_hvp in self.new_os_hvp.items():
3478         for hv_name, hv_params in os_hvp.items():
3479           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3480           # we need to fill in the new os_hvp on top of the actual hv_p
3481           cluster_defaults = self.new_hvparams.get(hv_name, {})
3482           new_osp = objects.FillDict(cluster_defaults, hv_params)
3483           hv_class = hypervisor.GetHypervisor(hv_name)
3484           hv_class.CheckParameterSyntax(new_osp)
3485           _CheckHVParams(self, node_list, hv_name, new_osp)
3486
3487     if self.op.default_iallocator:
3488       alloc_script = utils.FindFile(self.op.default_iallocator,
3489                                     constants.IALLOCATOR_SEARCH_PATH,
3490                                     os.path.isfile)
3491       if alloc_script is None:
3492         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3493                                    " specified" % self.op.default_iallocator,
3494                                    errors.ECODE_INVAL)
3495
3496   def Exec(self, feedback_fn):
3497     """Change the parameters of the cluster.
3498
3499     """
3500     if self.op.vg_name is not None:
3501       new_volume = self.op.vg_name
3502       if not new_volume:
3503         new_volume = None
3504       if new_volume != self.cfg.GetVGName():
3505         self.cfg.SetVGName(new_volume)
3506       else:
3507         feedback_fn("Cluster LVM configuration already in desired"
3508                     " state, not changing")
3509     if self.op.drbd_helper is not None:
3510       new_helper = self.op.drbd_helper
3511       if not new_helper:
3512         new_helper = None
3513       if new_helper != self.cfg.GetDRBDHelper():
3514         self.cfg.SetDRBDHelper(new_helper)
3515       else:
3516         feedback_fn("Cluster DRBD helper already in desired state,"
3517                     " not changing")
3518     if self.op.hvparams:
3519       self.cluster.hvparams = self.new_hvparams
3520     if self.op.os_hvp:
3521       self.cluster.os_hvp = self.new_os_hvp
3522     if self.op.enabled_hypervisors is not None:
3523       self.cluster.hvparams = self.new_hvparams
3524       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3525     if self.op.beparams:
3526       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3527     if self.op.nicparams:
3528       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3529     if self.op.osparams:
3530       self.cluster.osparams = self.new_osp
3531     if self.op.ndparams:
3532       self.cluster.ndparams = self.new_ndparams
3533
3534     if self.op.candidate_pool_size is not None:
3535       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3536       # we need to update the pool size here, otherwise the save will fail
3537       _AdjustCandidatePool(self, [])
3538
3539     if self.op.maintain_node_health is not None:
3540       self.cluster.maintain_node_health = self.op.maintain_node_health
3541
3542     if self.op.prealloc_wipe_disks is not None:
3543       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3544
3545     if self.op.add_uids is not None:
3546       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3547
3548     if self.op.remove_uids is not None:
3549       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3550
3551     if self.op.uid_pool is not None:
3552       self.cluster.uid_pool = self.op.uid_pool
3553
3554     if self.op.default_iallocator is not None:
3555       self.cluster.default_iallocator = self.op.default_iallocator
3556
3557     if self.op.reserved_lvs is not None:
3558       self.cluster.reserved_lvs = self.op.reserved_lvs
3559
3560     def helper_os(aname, mods, desc):
3561       desc += " OS list"
3562       lst = getattr(self.cluster, aname)
3563       for key, val in mods:
3564         if key == constants.DDM_ADD:
3565           if val in lst:
3566             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3567           else:
3568             lst.append(val)
3569         elif key == constants.DDM_REMOVE:
3570           if val in lst:
3571             lst.remove(val)
3572           else:
3573             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3574         else:
3575           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3576
3577     if self.op.hidden_os:
3578       helper_os("hidden_os", self.op.hidden_os, "hidden")
3579
3580     if self.op.blacklisted_os:
3581       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3582
3583     if self.op.master_netdev:
3584       master = self.cfg.GetMasterNode()
3585       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3586                   self.cluster.master_netdev)
3587       result = self.rpc.call_node_stop_master(master, False)
3588       result.Raise("Could not disable the master ip")
3589       feedback_fn("Changing master_netdev from %s to %s" %
3590                   (self.cluster.master_netdev, self.op.master_netdev))
3591       self.cluster.master_netdev = self.op.master_netdev
3592
3593     self.cfg.Update(self.cluster, feedback_fn)
3594
3595     if self.op.master_netdev:
3596       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3597                   self.op.master_netdev)
3598       result = self.rpc.call_node_start_master(master, False, False)
3599       if result.fail_msg:
3600         self.LogWarning("Could not re-enable the master ip on"
3601                         " the master, please restart manually: %s",
3602                         result.fail_msg)
3603
3604
3605 def _UploadHelper(lu, nodes, fname):
3606   """Helper for uploading a file and showing warnings.
3607
3608   """
3609   if os.path.exists(fname):
3610     result = lu.rpc.call_upload_file(nodes, fname)
3611     for to_node, to_result in result.items():
3612       msg = to_result.fail_msg
3613       if msg:
3614         msg = ("Copy of file %s to node %s failed: %s" %
3615                (fname, to_node, msg))
3616         lu.proc.LogWarning(msg)
3617
3618
3619 def _ComputeAncillaryFiles(cluster, redist):
3620   """Compute files external to Ganeti which need to be consistent.
3621
3622   @type redist: boolean
3623   @param redist: Whether to include files which need to be redistributed
3624
3625   """
3626   # Compute files for all nodes
3627   files_all = set([
3628     constants.SSH_KNOWN_HOSTS_FILE,
3629     constants.CONFD_HMAC_KEY,
3630     constants.CLUSTER_DOMAIN_SECRET_FILE,
3631     ])
3632
3633   if not redist:
3634     files_all.update(constants.ALL_CERT_FILES)
3635     files_all.update(ssconf.SimpleStore().GetFileList())
3636
3637   if cluster.modify_etc_hosts:
3638     files_all.add(constants.ETC_HOSTS)
3639
3640   # Files which must either exist on all nodes or on none
3641   files_all_opt = set([
3642     constants.RAPI_USERS_FILE,
3643     ])
3644
3645   # Files which should only be on master candidates
3646   files_mc = set()
3647   if not redist:
3648     files_mc.add(constants.CLUSTER_CONF_FILE)
3649
3650   # Files which should only be on VM-capable nodes
3651   files_vm = set(filename
3652     for hv_name in cluster.enabled_hypervisors
3653     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3654
3655   # Filenames must be unique
3656   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3657           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3658          "Found file listed in more than one file list"
3659
3660   return (files_all, files_all_opt, files_mc, files_vm)
3661
3662
3663 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3664   """Distribute additional files which are part of the cluster configuration.
3665
3666   ConfigWriter takes care of distributing the config and ssconf files, but
3667   there are more files which should be distributed to all nodes. This function
3668   makes sure those are copied.
3669
3670   @param lu: calling logical unit
3671   @param additional_nodes: list of nodes not in the config to distribute to
3672   @type additional_vm: boolean
3673   @param additional_vm: whether the additional nodes are vm-capable or not
3674
3675   """
3676   # Gather target nodes
3677   cluster = lu.cfg.GetClusterInfo()
3678   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3679
3680   online_nodes = lu.cfg.GetOnlineNodeList()
3681   vm_nodes = lu.cfg.GetVmCapableNodeList()
3682
3683   if additional_nodes is not None:
3684     online_nodes.extend(additional_nodes)
3685     if additional_vm:
3686       vm_nodes.extend(additional_nodes)
3687
3688   # Never distribute to master node
3689   for nodelist in [online_nodes, vm_nodes]:
3690     if master_info.name in nodelist:
3691       nodelist.remove(master_info.name)
3692
3693   # Gather file lists
3694   (files_all, files_all_opt, files_mc, files_vm) = \
3695     _ComputeAncillaryFiles(cluster, True)
3696
3697   # Never re-distribute configuration file from here
3698   assert not (constants.CLUSTER_CONF_FILE in files_all or
3699               constants.CLUSTER_CONF_FILE in files_vm)
3700   assert not files_mc, "Master candidates not handled in this function"
3701
3702   filemap = [
3703     (online_nodes, files_all),
3704     (online_nodes, files_all_opt),
3705     (vm_nodes, files_vm),
3706     ]
3707
3708   # Upload the files
3709   for (node_list, files) in filemap:
3710     for fname in files:
3711       _UploadHelper(lu, node_list, fname)
3712
3713
3714 class LUClusterRedistConf(NoHooksLU):
3715   """Force the redistribution of cluster configuration.
3716
3717   This is a very simple LU.
3718
3719   """
3720   REQ_BGL = False
3721
3722   def ExpandNames(self):
3723     self.needed_locks = {
3724       locking.LEVEL_NODE: locking.ALL_SET,
3725     }
3726     self.share_locks[locking.LEVEL_NODE] = 1
3727
3728   def Exec(self, feedback_fn):
3729     """Redistribute the configuration.
3730
3731     """
3732     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3733     _RedistributeAncillaryFiles(self)
3734
3735
3736 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3737   """Sleep and poll for an instance's disk to sync.
3738
3739   """
3740   if not instance.disks or disks is not None and not disks:
3741     return True
3742
3743   disks = _ExpandCheckDisks(instance, disks)
3744
3745   if not oneshot:
3746     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3747
3748   node = instance.primary_node
3749
3750   for dev in disks:
3751     lu.cfg.SetDiskID(dev, node)
3752
3753   # TODO: Convert to utils.Retry
3754
3755   retries = 0
3756   degr_retries = 10 # in seconds, as we sleep 1 second each time
3757   while True:
3758     max_time = 0
3759     done = True
3760     cumul_degraded = False
3761     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3762     msg = rstats.fail_msg
3763     if msg:
3764       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3765       retries += 1
3766       if retries >= 10:
3767         raise errors.RemoteError("Can't contact node %s for mirror data,"
3768                                  " aborting." % node)
3769       time.sleep(6)
3770       continue
3771     rstats = rstats.payload
3772     retries = 0
3773     for i, mstat in enumerate(rstats):
3774       if mstat is None:
3775         lu.LogWarning("Can't compute data for node %s/%s",
3776                            node, disks[i].iv_name)
3777         continue
3778
3779       cumul_degraded = (cumul_degraded or
3780                         (mstat.is_degraded and mstat.sync_percent is None))
3781       if mstat.sync_percent is not None:
3782         done = False
3783         if mstat.estimated_time is not None:
3784           rem_time = ("%s remaining (estimated)" %
3785                       utils.FormatSeconds(mstat.estimated_time))
3786           max_time = mstat.estimated_time
3787         else:
3788           rem_time = "no time estimate"
3789         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3790                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3791
3792     # if we're done but degraded, let's do a few small retries, to
3793     # make sure we see a stable and not transient situation; therefore
3794     # we force restart of the loop
3795     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3796       logging.info("Degraded disks found, %d retries left", degr_retries)
3797       degr_retries -= 1
3798       time.sleep(1)
3799       continue
3800
3801     if done or oneshot:
3802       break
3803
3804     time.sleep(min(60, max_time))
3805
3806   if done:
3807     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3808   return not cumul_degraded
3809
3810
3811 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3812   """Check that mirrors are not degraded.
3813
3814   The ldisk parameter, if True, will change the test from the
3815   is_degraded attribute (which represents overall non-ok status for
3816   the device(s)) to the ldisk (representing the local storage status).
3817
3818   """
3819   lu.cfg.SetDiskID(dev, node)
3820
3821   result = True
3822
3823   if on_primary or dev.AssembleOnSecondary():
3824     rstats = lu.rpc.call_blockdev_find(node, dev)
3825     msg = rstats.fail_msg
3826     if msg:
3827       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3828       result = False
3829     elif not rstats.payload:
3830       lu.LogWarning("Can't find disk on node %s", node)
3831       result = False
3832     else:
3833       if ldisk:
3834         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3835       else:
3836         result = result and not rstats.payload.is_degraded
3837
3838   if dev.children:
3839     for child in dev.children:
3840       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3841
3842   return result
3843
3844
3845 class LUOobCommand(NoHooksLU):
3846   """Logical unit for OOB handling.
3847
3848   """
3849   REG_BGL = False
3850   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3851
3852   def ExpandNames(self):
3853     """Gather locks we need.
3854
3855     """
3856     if self.op.node_names:
3857       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3858       lock_names = self.op.node_names
3859     else:
3860       lock_names = locking.ALL_SET
3861
3862     self.needed_locks = {
3863       locking.LEVEL_NODE: lock_names,
3864       }
3865
3866   def CheckPrereq(self):
3867     """Check prerequisites.
3868
3869     This checks:
3870      - the node exists in the configuration
3871      - OOB is supported
3872
3873     Any errors are signaled by raising errors.OpPrereqError.
3874
3875     """
3876     self.nodes = []
3877     self.master_node = self.cfg.GetMasterNode()
3878
3879     assert self.op.power_delay >= 0.0
3880
3881     if self.op.node_names:
3882       if (self.op.command in self._SKIP_MASTER and
3883           self.master_node in self.op.node_names):
3884         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3885         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3886
3887         if master_oob_handler:
3888           additional_text = ("run '%s %s %s' if you want to operate on the"
3889                              " master regardless") % (master_oob_handler,
3890                                                       self.op.command,
3891                                                       self.master_node)
3892         else:
3893           additional_text = "it does not support out-of-band operations"
3894
3895         raise errors.OpPrereqError(("Operating on the master node %s is not"
3896                                     " allowed for %s; %s") %
3897                                    (self.master_node, self.op.command,
3898                                     additional_text), errors.ECODE_INVAL)
3899     else:
3900       self.op.node_names = self.cfg.GetNodeList()
3901       if self.op.command in self._SKIP_MASTER:
3902         self.op.node_names.remove(self.master_node)
3903
3904     if self.op.command in self._SKIP_MASTER:
3905       assert self.master_node not in self.op.node_names
3906
3907     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
3908       if node is None:
3909         raise errors.OpPrereqError("Node %s not found" % node_name,
3910                                    errors.ECODE_NOENT)
3911       else:
3912         self.nodes.append(node)
3913
3914       if (not self.op.ignore_status and
3915           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3916         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3917                                     " not marked offline") % node_name,
3918                                    errors.ECODE_STATE)
3919
3920   def Exec(self, feedback_fn):
3921     """Execute OOB and return result if we expect any.
3922
3923     """
3924     master_node = self.master_node
3925     ret = []
3926
3927     for idx, node in enumerate(utils.NiceSort(self.nodes,
3928                                               key=lambda node: node.name)):
3929       node_entry = [(constants.RS_NORMAL, node.name)]
3930       ret.append(node_entry)
3931
3932       oob_program = _SupportsOob(self.cfg, node)
3933
3934       if not oob_program:
3935         node_entry.append((constants.RS_UNAVAIL, None))
3936         continue
3937
3938       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3939                    self.op.command, oob_program, node.name)
3940       result = self.rpc.call_run_oob(master_node, oob_program,
3941                                      self.op.command, node.name,
3942                                      self.op.timeout)
3943
3944       if result.fail_msg:
3945         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3946                         node.name, result.fail_msg)
3947         node_entry.append((constants.RS_NODATA, None))
3948       else:
3949         try:
3950           self._CheckPayload(result)
3951         except errors.OpExecError, err:
3952           self.LogWarning("Payload returned by node '%s' is not valid: %s",
3953                           node.name, err)
3954           node_entry.append((constants.RS_NODATA, None))
3955         else:
3956           if self.op.command == constants.OOB_HEALTH:
3957             # For health we should log important events
3958             for item, status in result.payload:
3959               if status in [constants.OOB_STATUS_WARNING,
3960                             constants.OOB_STATUS_CRITICAL]:
3961                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3962                                 item, node.name, status)
3963
3964           if self.op.command == constants.OOB_POWER_ON:
3965             node.powered = True
3966           elif self.op.command == constants.OOB_POWER_OFF:
3967             node.powered = False
3968           elif self.op.command == constants.OOB_POWER_STATUS:
3969             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3970             if powered != node.powered:
3971               logging.warning(("Recorded power state (%s) of node '%s' does not"
3972                                " match actual power state (%s)"), node.powered,
3973                               node.name, powered)
3974
3975           # For configuration changing commands we should update the node
3976           if self.op.command in (constants.OOB_POWER_ON,
3977                                  constants.OOB_POWER_OFF):
3978             self.cfg.Update(node, feedback_fn)
3979
3980           node_entry.append((constants.RS_NORMAL, result.payload))
3981
3982           if (self.op.command == constants.OOB_POWER_ON and
3983               idx < len(self.nodes) - 1):
3984             time.sleep(self.op.power_delay)
3985
3986     return ret
3987
3988   def _CheckPayload(self, result):
3989     """Checks if the payload is valid.
3990
3991     @param result: RPC result
3992     @raises errors.OpExecError: If payload is not valid
3993
3994     """
3995     errs = []
3996     if self.op.command == constants.OOB_HEALTH:
3997       if not isinstance(result.payload, list):
3998         errs.append("command 'health' is expected to return a list but got %s" %
3999                     type(result.payload))
4000       else:
4001         for item, status in result.payload:
4002           if status not in constants.OOB_STATUSES:
4003             errs.append("health item '%s' has invalid status '%s'" %
4004                         (item, status))
4005
4006     if self.op.command == constants.OOB_POWER_STATUS:
4007       if not isinstance(result.payload, dict):
4008         errs.append("power-status is expected to return a dict but got %s" %
4009                     type(result.payload))
4010
4011     if self.op.command in [
4012         constants.OOB_POWER_ON,
4013         constants.OOB_POWER_OFF,
4014         constants.OOB_POWER_CYCLE,
4015         ]:
4016       if result.payload is not None:
4017         errs.append("%s is expected to not return payload but got '%s'" %
4018                     (self.op.command, result.payload))
4019
4020     if errs:
4021       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4022                                utils.CommaJoin(errs))
4023
4024 class _OsQuery(_QueryBase):
4025   FIELDS = query.OS_FIELDS
4026
4027   def ExpandNames(self, lu):
4028     # Lock all nodes in shared mode
4029     # Temporary removal of locks, should be reverted later
4030     # TODO: reintroduce locks when they are lighter-weight
4031     lu.needed_locks = {}
4032     #self.share_locks[locking.LEVEL_NODE] = 1
4033     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4034
4035     # The following variables interact with _QueryBase._GetNames
4036     if self.names:
4037       self.wanted = self.names
4038     else:
4039       self.wanted = locking.ALL_SET
4040
4041     self.do_locking = self.use_locking
4042
4043   def DeclareLocks(self, lu, level):
4044     pass
4045
4046   @staticmethod
4047   def _DiagnoseByOS(rlist):
4048     """Remaps a per-node return list into an a per-os per-node dictionary
4049
4050     @param rlist: a map with node names as keys and OS objects as values
4051
4052     @rtype: dict
4053     @return: a dictionary with osnames as keys and as value another
4054         map, with nodes as keys and tuples of (path, status, diagnose,
4055         variants, parameters, api_versions) as values, eg::
4056
4057           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4058                                      (/srv/..., False, "invalid api")],
4059                            "node2": [(/srv/..., True, "", [], [])]}
4060           }
4061
4062     """
4063     all_os = {}
4064     # we build here the list of nodes that didn't fail the RPC (at RPC
4065     # level), so that nodes with a non-responding node daemon don't
4066     # make all OSes invalid
4067     good_nodes = [node_name for node_name in rlist
4068                   if not rlist[node_name].fail_msg]
4069     for node_name, nr in rlist.items():
4070       if nr.fail_msg or not nr.payload:
4071         continue
4072       for (name, path, status, diagnose, variants,
4073            params, api_versions) in nr.payload:
4074         if name not in all_os:
4075           # build a list of nodes for this os containing empty lists
4076           # for each node in node_list
4077           all_os[name] = {}
4078           for nname in good_nodes:
4079             all_os[name][nname] = []
4080         # convert params from [name, help] to (name, help)
4081         params = [tuple(v) for v in params]
4082         all_os[name][node_name].append((path, status, diagnose,
4083                                         variants, params, api_versions))
4084     return all_os
4085
4086   def _GetQueryData(self, lu):
4087     """Computes the list of nodes and their attributes.
4088
4089     """
4090     # Locking is not used
4091     assert not (compat.any(lu.glm.is_owned(level)
4092                            for level in locking.LEVELS
4093                            if level != locking.LEVEL_CLUSTER) or
4094                 self.do_locking or self.use_locking)
4095
4096     valid_nodes = [node.name
4097                    for node in lu.cfg.GetAllNodesInfo().values()
4098                    if not node.offline and node.vm_capable]
4099     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4100     cluster = lu.cfg.GetClusterInfo()
4101
4102     data = {}
4103
4104     for (os_name, os_data) in pol.items():
4105       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4106                           hidden=(os_name in cluster.hidden_os),
4107                           blacklisted=(os_name in cluster.blacklisted_os))
4108
4109       variants = set()
4110       parameters = set()
4111       api_versions = set()
4112
4113       for idx, osl in enumerate(os_data.values()):
4114         info.valid = bool(info.valid and osl and osl[0][1])
4115         if not info.valid:
4116           break
4117
4118         (node_variants, node_params, node_api) = osl[0][3:6]
4119         if idx == 0:
4120           # First entry
4121           variants.update(node_variants)
4122           parameters.update(node_params)
4123           api_versions.update(node_api)
4124         else:
4125           # Filter out inconsistent values
4126           variants.intersection_update(node_variants)
4127           parameters.intersection_update(node_params)
4128           api_versions.intersection_update(node_api)
4129
4130       info.variants = list(variants)
4131       info.parameters = list(parameters)
4132       info.api_versions = list(api_versions)
4133
4134       data[os_name] = info
4135
4136     # Prepare data in requested order
4137     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4138             if name in data]
4139
4140
4141 class LUOsDiagnose(NoHooksLU):
4142   """Logical unit for OS diagnose/query.
4143
4144   """
4145   REQ_BGL = False
4146
4147   @staticmethod
4148   def _BuildFilter(fields, names):
4149     """Builds a filter for querying OSes.
4150
4151     """
4152     name_filter = qlang.MakeSimpleFilter("name", names)
4153
4154     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4155     # respective field is not requested
4156     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4157                      for fname in ["hidden", "blacklisted"]
4158                      if fname not in fields]
4159     if "valid" not in fields:
4160       status_filter.append([qlang.OP_TRUE, "valid"])
4161
4162     if status_filter:
4163       status_filter.insert(0, qlang.OP_AND)
4164     else:
4165       status_filter = None
4166
4167     if name_filter and status_filter:
4168       return [qlang.OP_AND, name_filter, status_filter]
4169     elif name_filter:
4170       return name_filter
4171     else:
4172       return status_filter
4173
4174   def CheckArguments(self):
4175     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4176                        self.op.output_fields, False)
4177
4178   def ExpandNames(self):
4179     self.oq.ExpandNames(self)
4180
4181   def Exec(self, feedback_fn):
4182     return self.oq.OldStyleQuery(self)
4183
4184
4185 class LUNodeRemove(LogicalUnit):
4186   """Logical unit for removing a node.
4187
4188   """
4189   HPATH = "node-remove"
4190   HTYPE = constants.HTYPE_NODE
4191
4192   def BuildHooksEnv(self):
4193     """Build hooks env.
4194
4195     This doesn't run on the target node in the pre phase as a failed
4196     node would then be impossible to remove.
4197
4198     """
4199     return {
4200       "OP_TARGET": self.op.node_name,
4201       "NODE_NAME": self.op.node_name,
4202       }
4203
4204   def BuildHooksNodes(self):
4205     """Build hooks nodes.
4206
4207     """
4208     all_nodes = self.cfg.GetNodeList()
4209     try:
4210       all_nodes.remove(self.op.node_name)
4211     except ValueError:
4212       logging.warning("Node '%s', which is about to be removed, was not found"
4213                       " in the list of all nodes", self.op.node_name)
4214     return (all_nodes, all_nodes)
4215
4216   def CheckPrereq(self):
4217     """Check prerequisites.
4218
4219     This checks:
4220      - the node exists in the configuration
4221      - it does not have primary or secondary instances
4222      - it's not the master
4223
4224     Any errors are signaled by raising errors.OpPrereqError.
4225
4226     """
4227     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4228     node = self.cfg.GetNodeInfo(self.op.node_name)
4229     assert node is not None
4230
4231     masternode = self.cfg.GetMasterNode()
4232     if node.name == masternode:
4233       raise errors.OpPrereqError("Node is the master node, failover to another"
4234                                  " node is required", errors.ECODE_INVAL)
4235
4236     for instance_name, instance in self.cfg.GetAllInstancesInfo():
4237       if node.name in instance.all_nodes:
4238         raise errors.OpPrereqError("Instance %s is still running on the node,"
4239                                    " please remove first" % instance_name,
4240                                    errors.ECODE_INVAL)
4241     self.op.node_name = node.name
4242     self.node = node
4243
4244   def Exec(self, feedback_fn):
4245     """Removes the node from the cluster.
4246
4247     """
4248     node = self.node
4249     logging.info("Stopping the node daemon and removing configs from node %s",
4250                  node.name)
4251
4252     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4253
4254     # Promote nodes to master candidate as needed
4255     _AdjustCandidatePool(self, exceptions=[node.name])
4256     self.context.RemoveNode(node.name)
4257
4258     # Run post hooks on the node before it's removed
4259     _RunPostHook(self, node.name)
4260
4261     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4262     msg = result.fail_msg
4263     if msg:
4264       self.LogWarning("Errors encountered on the remote node while leaving"
4265                       " the cluster: %s", msg)
4266
4267     # Remove node from our /etc/hosts
4268     if self.cfg.GetClusterInfo().modify_etc_hosts:
4269       master_node = self.cfg.GetMasterNode()
4270       result = self.rpc.call_etc_hosts_modify(master_node,
4271                                               constants.ETC_HOSTS_REMOVE,
4272                                               node.name, None)
4273       result.Raise("Can't update hosts file with new host data")
4274       _RedistributeAncillaryFiles(self)
4275
4276
4277 class _NodeQuery(_QueryBase):
4278   FIELDS = query.NODE_FIELDS
4279
4280   def ExpandNames(self, lu):
4281     lu.needed_locks = {}
4282     lu.share_locks[locking.LEVEL_NODE] = 1
4283
4284     if self.names:
4285       self.wanted = _GetWantedNodes(lu, self.names)
4286     else:
4287       self.wanted = locking.ALL_SET
4288
4289     self.do_locking = (self.use_locking and
4290                        query.NQ_LIVE in self.requested_data)
4291
4292     if self.do_locking:
4293       # if we don't request only static fields, we need to lock the nodes
4294       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4295
4296   def DeclareLocks(self, lu, level):
4297     pass
4298
4299   def _GetQueryData(self, lu):
4300     """Computes the list of nodes and their attributes.
4301
4302     """
4303     all_info = lu.cfg.GetAllNodesInfo()
4304
4305     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4306
4307     # Gather data as requested
4308     if query.NQ_LIVE in self.requested_data:
4309       # filter out non-vm_capable nodes
4310       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4311
4312       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4313                                         lu.cfg.GetHypervisorType())
4314       live_data = dict((name, nresult.payload)
4315                        for (name, nresult) in node_data.items()
4316                        if not nresult.fail_msg and nresult.payload)
4317     else:
4318       live_data = None
4319
4320     if query.NQ_INST in self.requested_data:
4321       node_to_primary = dict([(name, set()) for name in nodenames])
4322       node_to_secondary = dict([(name, set()) for name in nodenames])
4323
4324       inst_data = lu.cfg.GetAllInstancesInfo()
4325
4326       for inst in inst_data.values():
4327         if inst.primary_node in node_to_primary:
4328           node_to_primary[inst.primary_node].add(inst.name)
4329         for secnode in inst.secondary_nodes:
4330           if secnode in node_to_secondary:
4331             node_to_secondary[secnode].add(inst.name)
4332     else:
4333       node_to_primary = None
4334       node_to_secondary = None
4335
4336     if query.NQ_OOB in self.requested_data:
4337       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4338                          for name, node in all_info.iteritems())
4339     else:
4340       oob_support = None
4341
4342     if query.NQ_GROUP in self.requested_data:
4343       groups = lu.cfg.GetAllNodeGroupsInfo()
4344     else:
4345       groups = {}
4346
4347     return query.NodeQueryData([all_info[name] for name in nodenames],
4348                                live_data, lu.cfg.GetMasterNode(),
4349                                node_to_primary, node_to_secondary, groups,
4350                                oob_support, lu.cfg.GetClusterInfo())
4351
4352
4353 class LUNodeQuery(NoHooksLU):
4354   """Logical unit for querying nodes.
4355
4356   """
4357   # pylint: disable-msg=W0142
4358   REQ_BGL = False
4359
4360   def CheckArguments(self):
4361     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4362                          self.op.output_fields, self.op.use_locking)
4363
4364   def ExpandNames(self):
4365     self.nq.ExpandNames(self)
4366
4367   def Exec(self, feedback_fn):
4368     return self.nq.OldStyleQuery(self)
4369
4370
4371 class LUNodeQueryvols(NoHooksLU):
4372   """Logical unit for getting volumes on node(s).
4373
4374   """
4375   REQ_BGL = False
4376   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4377   _FIELDS_STATIC = utils.FieldSet("node")
4378
4379   def CheckArguments(self):
4380     _CheckOutputFields(static=self._FIELDS_STATIC,
4381                        dynamic=self._FIELDS_DYNAMIC,
4382                        selected=self.op.output_fields)
4383
4384   def ExpandNames(self):
4385     self.needed_locks = {}
4386     self.share_locks[locking.LEVEL_NODE] = 1
4387     if not self.op.nodes:
4388       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4389     else:
4390       self.needed_locks[locking.LEVEL_NODE] = \
4391         _GetWantedNodes(self, self.op.nodes)
4392
4393   def Exec(self, feedback_fn):
4394     """Computes the list of nodes and their attributes.
4395
4396     """
4397     nodenames = self.owned_locks(locking.LEVEL_NODE)
4398     volumes = self.rpc.call_node_volumes(nodenames)
4399
4400     ilist = self.cfg.GetAllInstancesInfo()
4401     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4402
4403     output = []
4404     for node in nodenames:
4405       nresult = volumes[node]
4406       if nresult.offline:
4407         continue
4408       msg = nresult.fail_msg
4409       if msg:
4410         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4411         continue
4412
4413       node_vols = sorted(nresult.payload,
4414                          key=operator.itemgetter("dev"))
4415
4416       for vol in node_vols:
4417         node_output = []
4418         for field in self.op.output_fields:
4419           if field == "node":
4420             val = node
4421           elif field == "phys":
4422             val = vol["dev"]
4423           elif field == "vg":
4424             val = vol["vg"]
4425           elif field == "name":
4426             val = vol["name"]
4427           elif field == "size":
4428             val = int(float(vol["size"]))
4429           elif field == "instance":
4430             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4431           else:
4432             raise errors.ParameterError(field)
4433           node_output.append(str(val))
4434
4435         output.append(node_output)
4436
4437     return output
4438
4439
4440 class LUNodeQueryStorage(NoHooksLU):
4441   """Logical unit for getting information on storage units on node(s).
4442
4443   """
4444   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4445   REQ_BGL = False
4446
4447   def CheckArguments(self):
4448     _CheckOutputFields(static=self._FIELDS_STATIC,
4449                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4450                        selected=self.op.output_fields)
4451
4452   def ExpandNames(self):
4453     self.needed_locks = {}
4454     self.share_locks[locking.LEVEL_NODE] = 1
4455
4456     if self.op.nodes:
4457       self.needed_locks[locking.LEVEL_NODE] = \
4458         _GetWantedNodes(self, self.op.nodes)
4459     else:
4460       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4461
4462   def Exec(self, feedback_fn):
4463     """Computes the list of nodes and their attributes.
4464
4465     """
4466     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4467
4468     # Always get name to sort by
4469     if constants.SF_NAME in self.op.output_fields:
4470       fields = self.op.output_fields[:]
4471     else:
4472       fields = [constants.SF_NAME] + self.op.output_fields
4473
4474     # Never ask for node or type as it's only known to the LU
4475     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4476       while extra in fields:
4477         fields.remove(extra)
4478
4479     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4480     name_idx = field_idx[constants.SF_NAME]
4481
4482     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4483     data = self.rpc.call_storage_list(self.nodes,
4484                                       self.op.storage_type, st_args,
4485                                       self.op.name, fields)
4486
4487     result = []
4488
4489     for node in utils.NiceSort(self.nodes):
4490       nresult = data[node]
4491       if nresult.offline:
4492         continue
4493
4494       msg = nresult.fail_msg
4495       if msg:
4496         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4497         continue
4498
4499       rows = dict([(row[name_idx], row) for row in nresult.payload])
4500
4501       for name in utils.NiceSort(rows.keys()):
4502         row = rows[name]
4503
4504         out = []
4505
4506         for field in self.op.output_fields:
4507           if field == constants.SF_NODE:
4508             val = node
4509           elif field == constants.SF_TYPE:
4510             val = self.op.storage_type
4511           elif field in field_idx:
4512             val = row[field_idx[field]]
4513           else:
4514             raise errors.ParameterError(field)
4515
4516           out.append(val)
4517
4518         result.append(out)
4519
4520     return result
4521
4522
4523 class _InstanceQuery(_QueryBase):
4524   FIELDS = query.INSTANCE_FIELDS
4525
4526   def ExpandNames(self, lu):
4527     lu.needed_locks = {}
4528     lu.share_locks = _ShareAll()
4529
4530     if self.names:
4531       self.wanted = _GetWantedInstances(lu, self.names)
4532     else:
4533       self.wanted = locking.ALL_SET
4534
4535     self.do_locking = (self.use_locking and
4536                        query.IQ_LIVE in self.requested_data)
4537     if self.do_locking:
4538       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4539       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4540       lu.needed_locks[locking.LEVEL_NODE] = []
4541       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4542
4543     self.do_grouplocks = (self.do_locking and
4544                           query.IQ_NODES in self.requested_data)
4545
4546   def DeclareLocks(self, lu, level):
4547     if self.do_locking:
4548       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4549         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4550
4551         # Lock all groups used by instances optimistically; this requires going
4552         # via the node before it's locked, requiring verification later on
4553         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4554           set(group_uuid
4555               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4556               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4557       elif level == locking.LEVEL_NODE:
4558         lu._LockInstancesNodes() # pylint: disable-msg=W0212
4559
4560   @staticmethod
4561   def _CheckGroupLocks(lu):
4562     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4563     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4564
4565     # Check if node groups for locked instances are still correct
4566     for instance_name in owned_instances:
4567       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4568
4569   def _GetQueryData(self, lu):
4570     """Computes the list of instances and their attributes.
4571
4572     """
4573     if self.do_grouplocks:
4574       self._CheckGroupLocks(lu)
4575
4576     cluster = lu.cfg.GetClusterInfo()
4577     all_info = lu.cfg.GetAllInstancesInfo()
4578
4579     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4580
4581     instance_list = [all_info[name] for name in instance_names]
4582     nodes = frozenset(itertools.chain(*(inst.all_nodes
4583                                         for inst in instance_list)))
4584     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4585     bad_nodes = []
4586     offline_nodes = []
4587     wrongnode_inst = set()
4588
4589     # Gather data as requested
4590     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4591       live_data = {}
4592       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4593       for name in nodes:
4594         result = node_data[name]
4595         if result.offline:
4596           # offline nodes will be in both lists
4597           assert result.fail_msg
4598           offline_nodes.append(name)
4599         if result.fail_msg:
4600           bad_nodes.append(name)
4601         elif result.payload:
4602           for inst in result.payload:
4603             if inst in all_info:
4604               if all_info[inst].primary_node == name:
4605                 live_data.update(result.payload)
4606               else:
4607                 wrongnode_inst.add(inst)
4608             else:
4609               # orphan instance; we don't list it here as we don't
4610               # handle this case yet in the output of instance listing
4611               logging.warning("Orphan instance '%s' found on node %s",
4612                               inst, name)
4613         # else no instance is alive
4614     else:
4615       live_data = {}
4616
4617     if query.IQ_DISKUSAGE in self.requested_data:
4618       disk_usage = dict((inst.name,
4619                          _ComputeDiskSize(inst.disk_template,
4620                                           [{constants.IDISK_SIZE: disk.size}
4621                                            for disk in inst.disks]))
4622                         for inst in instance_list)
4623     else:
4624       disk_usage = None
4625
4626     if query.IQ_CONSOLE in self.requested_data:
4627       consinfo = {}
4628       for inst in instance_list:
4629         if inst.name in live_data:
4630           # Instance is running
4631           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4632         else:
4633           consinfo[inst.name] = None
4634       assert set(consinfo.keys()) == set(instance_names)
4635     else:
4636       consinfo = None
4637
4638     if query.IQ_NODES in self.requested_data:
4639       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4640                                             instance_list)))
4641       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4642       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4643                     for uuid in set(map(operator.attrgetter("group"),
4644                                         nodes.values())))
4645     else:
4646       nodes = None
4647       groups = None
4648
4649     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4650                                    disk_usage, offline_nodes, bad_nodes,
4651                                    live_data, wrongnode_inst, consinfo,
4652                                    nodes, groups)
4653
4654
4655 class LUQuery(NoHooksLU):
4656   """Query for resources/items of a certain kind.
4657
4658   """
4659   # pylint: disable-msg=W0142
4660   REQ_BGL = False
4661
4662   def CheckArguments(self):
4663     qcls = _GetQueryImplementation(self.op.what)
4664
4665     self.impl = qcls(self.op.filter, self.op.fields, False)
4666
4667   def ExpandNames(self):
4668     self.impl.ExpandNames(self)
4669
4670   def DeclareLocks(self, level):
4671     self.impl.DeclareLocks(self, level)
4672
4673   def Exec(self, feedback_fn):
4674     return self.impl.NewStyleQuery(self)
4675
4676
4677 class LUQueryFields(NoHooksLU):
4678   """Query for resources/items of a certain kind.
4679
4680   """
4681   # pylint: disable-msg=W0142
4682   REQ_BGL = False
4683
4684   def CheckArguments(self):
4685     self.qcls = _GetQueryImplementation(self.op.what)
4686
4687   def ExpandNames(self):
4688     self.needed_locks = {}
4689
4690   def Exec(self, feedback_fn):
4691     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4692
4693
4694 class LUNodeModifyStorage(NoHooksLU):
4695   """Logical unit for modifying a storage volume on a node.
4696
4697   """
4698   REQ_BGL = False
4699
4700   def CheckArguments(self):
4701     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4702
4703     storage_type = self.op.storage_type
4704
4705     try:
4706       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4707     except KeyError:
4708       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4709                                  " modified" % storage_type,
4710                                  errors.ECODE_INVAL)
4711
4712     diff = set(self.op.changes.keys()) - modifiable
4713     if diff:
4714       raise errors.OpPrereqError("The following fields can not be modified for"
4715                                  " storage units of type '%s': %r" %
4716                                  (storage_type, list(diff)),
4717                                  errors.ECODE_INVAL)
4718
4719   def ExpandNames(self):
4720     self.needed_locks = {
4721       locking.LEVEL_NODE: self.op.node_name,
4722       }
4723
4724   def Exec(self, feedback_fn):
4725     """Computes the list of nodes and their attributes.
4726
4727     """
4728     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4729     result = self.rpc.call_storage_modify(self.op.node_name,
4730                                           self.op.storage_type, st_args,
4731                                           self.op.name, self.op.changes)
4732     result.Raise("Failed to modify storage unit '%s' on %s" %
4733                  (self.op.name, self.op.node_name))
4734
4735
4736 class LUNodeAdd(LogicalUnit):
4737   """Logical unit for adding node to the cluster.
4738
4739   """
4740   HPATH = "node-add"
4741   HTYPE = constants.HTYPE_NODE
4742   _NFLAGS = ["master_capable", "vm_capable"]
4743
4744   def CheckArguments(self):
4745     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4746     # validate/normalize the node name
4747     self.hostname = netutils.GetHostname(name=self.op.node_name,
4748                                          family=self.primary_ip_family)
4749     self.op.node_name = self.hostname.name
4750
4751     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4752       raise errors.OpPrereqError("Cannot readd the master node",
4753                                  errors.ECODE_STATE)
4754
4755     if self.op.readd and self.op.group:
4756       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4757                                  " being readded", errors.ECODE_INVAL)
4758
4759   def BuildHooksEnv(self):
4760     """Build hooks env.
4761
4762     This will run on all nodes before, and on all nodes + the new node after.
4763
4764     """
4765     return {
4766       "OP_TARGET": self.op.node_name,
4767       "NODE_NAME": self.op.node_name,
4768       "NODE_PIP": self.op.primary_ip,
4769       "NODE_SIP": self.op.secondary_ip,
4770       "MASTER_CAPABLE": str(self.op.master_capable),
4771       "VM_CAPABLE": str(self.op.vm_capable),
4772       }
4773
4774   def BuildHooksNodes(self):
4775     """Build hooks nodes.
4776
4777     """
4778     # Exclude added node
4779     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4780     post_nodes = pre_nodes + [self.op.node_name, ]
4781
4782     return (pre_nodes, post_nodes)
4783
4784   def CheckPrereq(self):
4785     """Check prerequisites.
4786
4787     This checks:
4788      - the new node is not already in the config
4789      - it is resolvable
4790      - its parameters (single/dual homed) matches the cluster
4791
4792     Any errors are signaled by raising errors.OpPrereqError.
4793
4794     """
4795     cfg = self.cfg
4796     hostname = self.hostname
4797     node = hostname.name
4798     primary_ip = self.op.primary_ip = hostname.ip
4799     if self.op.secondary_ip is None:
4800       if self.primary_ip_family == netutils.IP6Address.family:
4801         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4802                                    " IPv4 address must be given as secondary",
4803                                    errors.ECODE_INVAL)
4804       self.op.secondary_ip = primary_ip
4805
4806     secondary_ip = self.op.secondary_ip
4807     if not netutils.IP4Address.IsValid(secondary_ip):
4808       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4809                                  " address" % secondary_ip, errors.ECODE_INVAL)
4810
4811     node_list = cfg.GetNodeList()
4812     if not self.op.readd and node in node_list:
4813       raise errors.OpPrereqError("Node %s is already in the configuration" %
4814                                  node, errors.ECODE_EXISTS)
4815     elif self.op.readd and node not in node_list:
4816       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4817                                  errors.ECODE_NOENT)
4818
4819     self.changed_primary_ip = False
4820
4821     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4822       if self.op.readd and node == existing_node_name:
4823         if existing_node.secondary_ip != secondary_ip:
4824           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4825                                      " address configuration as before",
4826                                      errors.ECODE_INVAL)
4827         if existing_node.primary_ip != primary_ip:
4828           self.changed_primary_ip = True
4829
4830         continue
4831
4832       if (existing_node.primary_ip == primary_ip or
4833           existing_node.secondary_ip == primary_ip or
4834           existing_node.primary_ip == secondary_ip or
4835           existing_node.secondary_ip == secondary_ip):
4836         raise errors.OpPrereqError("New node ip address(es) conflict with"
4837                                    " existing node %s" % existing_node.name,
4838                                    errors.ECODE_NOTUNIQUE)
4839
4840     # After this 'if' block, None is no longer a valid value for the
4841     # _capable op attributes
4842     if self.op.readd:
4843       old_node = self.cfg.GetNodeInfo(node)
4844       assert old_node is not None, "Can't retrieve locked node %s" % node
4845       for attr in self._NFLAGS:
4846         if getattr(self.op, attr) is None:
4847           setattr(self.op, attr, getattr(old_node, attr))
4848     else:
4849       for attr in self._NFLAGS:
4850         if getattr(self.op, attr) is None:
4851           setattr(self.op, attr, True)
4852
4853     if self.op.readd and not self.op.vm_capable:
4854       pri, sec = cfg.GetNodeInstances(node)
4855       if pri or sec:
4856         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4857                                    " flag set to false, but it already holds"
4858                                    " instances" % node,
4859                                    errors.ECODE_STATE)
4860
4861     # check that the type of the node (single versus dual homed) is the
4862     # same as for the master
4863     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4864     master_singlehomed = myself.secondary_ip == myself.primary_ip
4865     newbie_singlehomed = secondary_ip == primary_ip
4866     if master_singlehomed != newbie_singlehomed:
4867       if master_singlehomed:
4868         raise errors.OpPrereqError("The master has no secondary ip but the"
4869                                    " new node has one",
4870                                    errors.ECODE_INVAL)
4871       else:
4872         raise errors.OpPrereqError("The master has a secondary ip but the"
4873                                    " new node doesn't have one",
4874                                    errors.ECODE_INVAL)
4875
4876     # checks reachability
4877     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4878       raise errors.OpPrereqError("Node not reachable by ping",
4879                                  errors.ECODE_ENVIRON)
4880
4881     if not newbie_singlehomed:
4882       # check reachability from my secondary ip to newbie's secondary ip
4883       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4884                            source=myself.secondary_ip):
4885         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4886                                    " based ping to node daemon port",
4887                                    errors.ECODE_ENVIRON)
4888
4889     if self.op.readd:
4890       exceptions = [node]
4891     else:
4892       exceptions = []
4893
4894     if self.op.master_capable:
4895       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4896     else:
4897       self.master_candidate = False
4898
4899     if self.op.readd:
4900       self.new_node = old_node
4901     else:
4902       node_group = cfg.LookupNodeGroup(self.op.group)
4903       self.new_node = objects.Node(name=node,
4904                                    primary_ip=primary_ip,
4905                                    secondary_ip=secondary_ip,
4906                                    master_candidate=self.master_candidate,
4907                                    offline=False, drained=False,
4908                                    group=node_group)
4909
4910     if self.op.ndparams:
4911       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4912
4913   def Exec(self, feedback_fn):
4914     """Adds the new node to the cluster.
4915
4916     """
4917     new_node = self.new_node
4918     node = new_node.name
4919
4920     # We adding a new node so we assume it's powered
4921     new_node.powered = True
4922
4923     # for re-adds, reset the offline/drained/master-candidate flags;
4924     # we need to reset here, otherwise offline would prevent RPC calls
4925     # later in the procedure; this also means that if the re-add
4926     # fails, we are left with a non-offlined, broken node
4927     if self.op.readd:
4928       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4929       self.LogInfo("Readding a node, the offline/drained flags were reset")
4930       # if we demote the node, we do cleanup later in the procedure
4931       new_node.master_candidate = self.master_candidate
4932       if self.changed_primary_ip:
4933         new_node.primary_ip = self.op.primary_ip
4934
4935     # copy the master/vm_capable flags
4936     for attr in self._NFLAGS:
4937       setattr(new_node, attr, getattr(self.op, attr))
4938
4939     # notify the user about any possible mc promotion
4940     if new_node.master_candidate:
4941       self.LogInfo("Node will be a master candidate")
4942
4943     if self.op.ndparams:
4944       new_node.ndparams = self.op.ndparams
4945     else:
4946       new_node.ndparams = {}
4947
4948     # check connectivity
4949     result = self.rpc.call_version([node])[node]
4950     result.Raise("Can't get version information from node %s" % node)
4951     if constants.PROTOCOL_VERSION == result.payload:
4952       logging.info("Communication to node %s fine, sw version %s match",
4953                    node, result.payload)
4954     else:
4955       raise errors.OpExecError("Version mismatch master version %s,"
4956                                " node version %s" %
4957                                (constants.PROTOCOL_VERSION, result.payload))
4958
4959     # Add node to our /etc/hosts, and add key to known_hosts
4960     if self.cfg.GetClusterInfo().modify_etc_hosts:
4961       master_node = self.cfg.GetMasterNode()
4962       result = self.rpc.call_etc_hosts_modify(master_node,
4963                                               constants.ETC_HOSTS_ADD,
4964                                               self.hostname.name,
4965                                               self.hostname.ip)
4966       result.Raise("Can't update hosts file with new host data")
4967
4968     if new_node.secondary_ip != new_node.primary_ip:
4969       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4970                                False)
4971
4972     node_verify_list = [self.cfg.GetMasterNode()]
4973     node_verify_param = {
4974       constants.NV_NODELIST: [node],
4975       # TODO: do a node-net-test as well?
4976     }
4977
4978     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4979                                        self.cfg.GetClusterName())
4980     for verifier in node_verify_list:
4981       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4982       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4983       if nl_payload:
4984         for failed in nl_payload:
4985           feedback_fn("ssh/hostname verification failed"
4986                       " (checking from %s): %s" %
4987                       (verifier, nl_payload[failed]))
4988         raise errors.OpExecError("ssh/hostname verification failed")
4989
4990     if self.op.readd:
4991       _RedistributeAncillaryFiles(self)
4992       self.context.ReaddNode(new_node)
4993       # make sure we redistribute the config
4994       self.cfg.Update(new_node, feedback_fn)
4995       # and make sure the new node will not have old files around
4996       if not new_node.master_candidate:
4997         result = self.rpc.call_node_demote_from_mc(new_node.name)
4998         msg = result.fail_msg
4999         if msg:
5000           self.LogWarning("Node failed to demote itself from master"
5001                           " candidate status: %s" % msg)
5002     else:
5003       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5004                                   additional_vm=self.op.vm_capable)
5005       self.context.AddNode(new_node, self.proc.GetECId())
5006
5007
5008 class LUNodeSetParams(LogicalUnit):
5009   """Modifies the parameters of a node.
5010
5011   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5012       to the node role (as _ROLE_*)
5013   @cvar _R2F: a dictionary from node role to tuples of flags
5014   @cvar _FLAGS: a list of attribute names corresponding to the flags
5015
5016   """
5017   HPATH = "node-modify"
5018   HTYPE = constants.HTYPE_NODE
5019   REQ_BGL = False
5020   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5021   _F2R = {
5022     (True, False, False): _ROLE_CANDIDATE,
5023     (False, True, False): _ROLE_DRAINED,
5024     (False, False, True): _ROLE_OFFLINE,
5025     (False, False, False): _ROLE_REGULAR,
5026     }
5027   _R2F = dict((v, k) for k, v in _F2R.items())
5028   _FLAGS = ["master_candidate", "drained", "offline"]
5029
5030   def CheckArguments(self):
5031     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5032     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5033                 self.op.master_capable, self.op.vm_capable,
5034                 self.op.secondary_ip, self.op.ndparams]
5035     if all_mods.count(None) == len(all_mods):
5036       raise errors.OpPrereqError("Please pass at least one modification",
5037                                  errors.ECODE_INVAL)
5038     if all_mods.count(True) > 1:
5039       raise errors.OpPrereqError("Can't set the node into more than one"
5040                                  " state at the same time",
5041                                  errors.ECODE_INVAL)
5042
5043     # Boolean value that tells us whether we might be demoting from MC
5044     self.might_demote = (self.op.master_candidate == False or
5045                          self.op.offline == True or
5046                          self.op.drained == True or
5047                          self.op.master_capable == False)
5048
5049     if self.op.secondary_ip:
5050       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5051         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5052                                    " address" % self.op.secondary_ip,
5053                                    errors.ECODE_INVAL)
5054
5055     self.lock_all = self.op.auto_promote and self.might_demote
5056     self.lock_instances = self.op.secondary_ip is not None
5057
5058   def ExpandNames(self):
5059     if self.lock_all:
5060       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5061     else:
5062       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5063
5064     if self.lock_instances:
5065       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5066
5067   def DeclareLocks(self, level):
5068     # If we have locked all instances, before waiting to lock nodes, release
5069     # all the ones living on nodes unrelated to the current operation.
5070     if level == locking.LEVEL_NODE and self.lock_instances:
5071       self.affected_instances = []
5072       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5073         instances_keep = []
5074
5075         # Build list of instances to release
5076         locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5077         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5078           if (instance.disk_template in constants.DTS_INT_MIRROR and
5079               self.op.node_name in instance.all_nodes):
5080             instances_keep.append(instance_name)
5081             self.affected_instances.append(instance)
5082
5083         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5084
5085         assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5086                 set(instances_keep))
5087
5088   def BuildHooksEnv(self):
5089     """Build hooks env.
5090
5091     This runs on the master node.
5092
5093     """
5094     return {
5095       "OP_TARGET": self.op.node_name,
5096       "MASTER_CANDIDATE": str(self.op.master_candidate),
5097       "OFFLINE": str(self.op.offline),
5098       "DRAINED": str(self.op.drained),
5099       "MASTER_CAPABLE": str(self.op.master_capable),
5100       "VM_CAPABLE": str(self.op.vm_capable),
5101       }
5102
5103   def BuildHooksNodes(self):
5104     """Build hooks nodes.
5105
5106     """
5107     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5108     return (nl, nl)
5109
5110   def CheckPrereq(self):
5111     """Check prerequisites.
5112
5113     This only checks the instance list against the existing names.
5114
5115     """
5116     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5117
5118     if (self.op.master_candidate is not None or
5119         self.op.drained is not None or
5120         self.op.offline is not None):
5121       # we can't change the master's node flags
5122       if self.op.node_name == self.cfg.GetMasterNode():
5123         raise errors.OpPrereqError("The master role can be changed"
5124                                    " only via master-failover",
5125                                    errors.ECODE_INVAL)
5126
5127     if self.op.master_candidate and not node.master_capable:
5128       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5129                                  " it a master candidate" % node.name,
5130                                  errors.ECODE_STATE)
5131
5132     if self.op.vm_capable == False:
5133       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5134       if ipri or isec:
5135         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5136                                    " the vm_capable flag" % node.name,
5137                                    errors.ECODE_STATE)
5138
5139     if node.master_candidate and self.might_demote and not self.lock_all:
5140       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5141       # check if after removing the current node, we're missing master
5142       # candidates
5143       (mc_remaining, mc_should, _) = \
5144           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5145       if mc_remaining < mc_should:
5146         raise errors.OpPrereqError("Not enough master candidates, please"
5147                                    " pass auto promote option to allow"
5148                                    " promotion", errors.ECODE_STATE)
5149
5150     self.old_flags = old_flags = (node.master_candidate,
5151                                   node.drained, node.offline)
5152     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5153     self.old_role = old_role = self._F2R[old_flags]
5154
5155     # Check for ineffective changes
5156     for attr in self._FLAGS:
5157       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5158         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5159         setattr(self.op, attr, None)
5160
5161     # Past this point, any flag change to False means a transition
5162     # away from the respective state, as only real changes are kept
5163
5164     # TODO: We might query the real power state if it supports OOB
5165     if _SupportsOob(self.cfg, node):
5166       if self.op.offline is False and not (node.powered or
5167                                            self.op.powered == True):
5168         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5169                                     " offline status can be reset") %
5170                                    self.op.node_name)
5171     elif self.op.powered is not None:
5172       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5173                                   " as it does not support out-of-band"
5174                                   " handling") % self.op.node_name)
5175
5176     # If we're being deofflined/drained, we'll MC ourself if needed
5177     if (self.op.drained == False or self.op.offline == False or
5178         (self.op.master_capable and not node.master_capable)):
5179       if _DecideSelfPromotion(self):
5180         self.op.master_candidate = True
5181         self.LogInfo("Auto-promoting node to master candidate")
5182
5183     # If we're no longer master capable, we'll demote ourselves from MC
5184     if self.op.master_capable == False and node.master_candidate:
5185       self.LogInfo("Demoting from master candidate")
5186       self.op.master_candidate = False
5187
5188     # Compute new role
5189     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5190     if self.op.master_candidate:
5191       new_role = self._ROLE_CANDIDATE
5192     elif self.op.drained:
5193       new_role = self._ROLE_DRAINED
5194     elif self.op.offline:
5195       new_role = self._ROLE_OFFLINE
5196     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5197       # False is still in new flags, which means we're un-setting (the
5198       # only) True flag
5199       new_role = self._ROLE_REGULAR
5200     else: # no new flags, nothing, keep old role
5201       new_role = old_role
5202
5203     self.new_role = new_role
5204
5205     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5206       # Trying to transition out of offline status
5207       result = self.rpc.call_version([node.name])[node.name]
5208       if result.fail_msg:
5209         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5210                                    " to report its version: %s" %
5211                                    (node.name, result.fail_msg),
5212                                    errors.ECODE_STATE)
5213       else:
5214         self.LogWarning("Transitioning node from offline to online state"
5215                         " without using re-add. Please make sure the node"
5216                         " is healthy!")
5217
5218     if self.op.secondary_ip:
5219       # Ok even without locking, because this can't be changed by any LU
5220       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5221       master_singlehomed = master.secondary_ip == master.primary_ip
5222       if master_singlehomed and self.op.secondary_ip:
5223         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5224                                    " homed cluster", errors.ECODE_INVAL)
5225
5226       if node.offline:
5227         if self.affected_instances:
5228           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5229                                      " node has instances (%s) configured"
5230                                      " to use it" % self.affected_instances)
5231       else:
5232         # On online nodes, check that no instances are running, and that
5233         # the node has the new ip and we can reach it.
5234         for instance in self.affected_instances:
5235           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5236
5237         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5238         if master.name != node.name:
5239           # check reachability from master secondary ip to new secondary ip
5240           if not netutils.TcpPing(self.op.secondary_ip,
5241                                   constants.DEFAULT_NODED_PORT,
5242                                   source=master.secondary_ip):
5243             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5244                                        " based ping to node daemon port",
5245                                        errors.ECODE_ENVIRON)
5246
5247     if self.op.ndparams:
5248       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5249       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5250       self.new_ndparams = new_ndparams
5251
5252   def Exec(self, feedback_fn):
5253     """Modifies a node.
5254
5255     """
5256     node = self.node
5257     old_role = self.old_role
5258     new_role = self.new_role
5259
5260     result = []
5261
5262     if self.op.ndparams:
5263       node.ndparams = self.new_ndparams
5264
5265     if self.op.powered is not None:
5266       node.powered = self.op.powered
5267
5268     for attr in ["master_capable", "vm_capable"]:
5269       val = getattr(self.op, attr)
5270       if val is not None:
5271         setattr(node, attr, val)
5272         result.append((attr, str(val)))
5273
5274     if new_role != old_role:
5275       # Tell the node to demote itself, if no longer MC and not offline
5276       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5277         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5278         if msg:
5279           self.LogWarning("Node failed to demote itself: %s", msg)
5280
5281       new_flags = self._R2F[new_role]
5282       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5283         if of != nf:
5284           result.append((desc, str(nf)))
5285       (node.master_candidate, node.drained, node.offline) = new_flags
5286
5287       # we locked all nodes, we adjust the CP before updating this node
5288       if self.lock_all:
5289         _AdjustCandidatePool(self, [node.name])
5290
5291     if self.op.secondary_ip:
5292       node.secondary_ip = self.op.secondary_ip
5293       result.append(("secondary_ip", self.op.secondary_ip))
5294
5295     # this will trigger configuration file update, if needed
5296     self.cfg.Update(node, feedback_fn)
5297
5298     # this will trigger job queue propagation or cleanup if the mc
5299     # flag changed
5300     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5301       self.context.ReaddNode(node)
5302
5303     return result
5304
5305
5306 class LUNodePowercycle(NoHooksLU):
5307   """Powercycles a node.
5308
5309   """
5310   REQ_BGL = False
5311
5312   def CheckArguments(self):
5313     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5314     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5315       raise errors.OpPrereqError("The node is the master and the force"
5316                                  " parameter was not set",
5317                                  errors.ECODE_INVAL)
5318
5319   def ExpandNames(self):
5320     """Locking for PowercycleNode.
5321
5322     This is a last-resort option and shouldn't block on other
5323     jobs. Therefore, we grab no locks.
5324
5325     """
5326     self.needed_locks = {}
5327
5328   def Exec(self, feedback_fn):
5329     """Reboots a node.
5330
5331     """
5332     result = self.rpc.call_node_powercycle(self.op.node_name,
5333                                            self.cfg.GetHypervisorType())
5334     result.Raise("Failed to schedule the reboot")
5335     return result.payload
5336
5337
5338 class LUClusterQuery(NoHooksLU):
5339   """Query cluster configuration.
5340
5341   """
5342   REQ_BGL = False
5343
5344   def ExpandNames(self):
5345     self.needed_locks = {}
5346
5347   def Exec(self, feedback_fn):
5348     """Return cluster config.
5349
5350     """
5351     cluster = self.cfg.GetClusterInfo()
5352     os_hvp = {}
5353
5354     # Filter just for enabled hypervisors
5355     for os_name, hv_dict in cluster.os_hvp.items():
5356       os_hvp[os_name] = {}
5357       for hv_name, hv_params in hv_dict.items():
5358         if hv_name in cluster.enabled_hypervisors:
5359           os_hvp[os_name][hv_name] = hv_params
5360
5361     # Convert ip_family to ip_version
5362     primary_ip_version = constants.IP4_VERSION
5363     if cluster.primary_ip_family == netutils.IP6Address.family:
5364       primary_ip_version = constants.IP6_VERSION
5365
5366     result = {
5367       "software_version": constants.RELEASE_VERSION,
5368       "protocol_version": constants.PROTOCOL_VERSION,
5369       "config_version": constants.CONFIG_VERSION,
5370       "os_api_version": max(constants.OS_API_VERSIONS),
5371       "export_version": constants.EXPORT_VERSION,
5372       "architecture": (platform.architecture()[0], platform.machine()),
5373       "name": cluster.cluster_name,
5374       "master": cluster.master_node,
5375       "default_hypervisor": cluster.enabled_hypervisors[0],
5376       "enabled_hypervisors": cluster.enabled_hypervisors,
5377       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5378                         for hypervisor_name in cluster.enabled_hypervisors]),
5379       "os_hvp": os_hvp,
5380       "beparams": cluster.beparams,
5381       "osparams": cluster.osparams,
5382       "nicparams": cluster.nicparams,
5383       "ndparams": cluster.ndparams,
5384       "candidate_pool_size": cluster.candidate_pool_size,
5385       "master_netdev": cluster.master_netdev,
5386       "volume_group_name": cluster.volume_group_name,
5387       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5388       "file_storage_dir": cluster.file_storage_dir,
5389       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5390       "maintain_node_health": cluster.maintain_node_health,
5391       "ctime": cluster.ctime,
5392       "mtime": cluster.mtime,
5393       "uuid": cluster.uuid,
5394       "tags": list(cluster.GetTags()),
5395       "uid_pool": cluster.uid_pool,
5396       "default_iallocator": cluster.default_iallocator,
5397       "reserved_lvs": cluster.reserved_lvs,
5398       "primary_ip_version": primary_ip_version,
5399       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5400       "hidden_os": cluster.hidden_os,
5401       "blacklisted_os": cluster.blacklisted_os,
5402       }
5403
5404     return result
5405
5406
5407 class LUClusterConfigQuery(NoHooksLU):
5408   """Return configuration values.
5409
5410   """
5411   REQ_BGL = False
5412   _FIELDS_DYNAMIC = utils.FieldSet()
5413   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5414                                   "watcher_pause", "volume_group_name")
5415
5416   def CheckArguments(self):
5417     _CheckOutputFields(static=self._FIELDS_STATIC,
5418                        dynamic=self._FIELDS_DYNAMIC,
5419                        selected=self.op.output_fields)
5420
5421   def ExpandNames(self):
5422     self.needed_locks = {}
5423
5424   def Exec(self, feedback_fn):
5425     """Dump a representation of the cluster config to the standard output.
5426
5427     """
5428     values = []
5429     for field in self.op.output_fields:
5430       if field == "cluster_name":
5431         entry = self.cfg.GetClusterName()
5432       elif field == "master_node":
5433         entry = self.cfg.GetMasterNode()
5434       elif field == "drain_flag":
5435         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5436       elif field == "watcher_pause":
5437         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5438       elif field == "volume_group_name":
5439         entry = self.cfg.GetVGName()
5440       else:
5441         raise errors.ParameterError(field)
5442       values.append(entry)
5443     return values
5444
5445
5446 class LUInstanceActivateDisks(NoHooksLU):
5447   """Bring up an instance's disks.
5448
5449   """
5450   REQ_BGL = False
5451
5452   def ExpandNames(self):
5453     self._ExpandAndLockInstance()
5454     self.needed_locks[locking.LEVEL_NODE] = []
5455     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5456
5457   def DeclareLocks(self, level):
5458     if level == locking.LEVEL_NODE:
5459       self._LockInstancesNodes()
5460
5461   def CheckPrereq(self):
5462     """Check prerequisites.
5463
5464     This checks that the instance is in the cluster.
5465
5466     """
5467     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5468     assert self.instance is not None, \
5469       "Cannot retrieve locked instance %s" % self.op.instance_name
5470     _CheckNodeOnline(self, self.instance.primary_node)
5471
5472   def Exec(self, feedback_fn):
5473     """Activate the disks.
5474
5475     """
5476     disks_ok, disks_info = \
5477               _AssembleInstanceDisks(self, self.instance,
5478                                      ignore_size=self.op.ignore_size)
5479     if not disks_ok:
5480       raise errors.OpExecError("Cannot activate block devices")
5481
5482     return disks_info
5483
5484
5485 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5486                            ignore_size=False):
5487   """Prepare the block devices for an instance.
5488
5489   This sets up the block devices on all nodes.
5490
5491   @type lu: L{LogicalUnit}
5492   @param lu: the logical unit on whose behalf we execute
5493   @type instance: L{objects.Instance}
5494   @param instance: the instance for whose disks we assemble
5495   @type disks: list of L{objects.Disk} or None
5496   @param disks: which disks to assemble (or all, if None)
5497   @type ignore_secondaries: boolean
5498   @param ignore_secondaries: if true, errors on secondary nodes
5499       won't result in an error return from the function
5500   @type ignore_size: boolean
5501   @param ignore_size: if true, the current known size of the disk
5502       will not be used during the disk activation, useful for cases
5503       when the size is wrong
5504   @return: False if the operation failed, otherwise a list of
5505       (host, instance_visible_name, node_visible_name)
5506       with the mapping from node devices to instance devices
5507
5508   """
5509   device_info = []
5510   disks_ok = True
5511   iname = instance.name
5512   disks = _ExpandCheckDisks(instance, disks)
5513
5514   # With the two passes mechanism we try to reduce the window of
5515   # opportunity for the race condition of switching DRBD to primary
5516   # before handshaking occured, but we do not eliminate it
5517
5518   # The proper fix would be to wait (with some limits) until the
5519   # connection has been made and drbd transitions from WFConnection
5520   # into any other network-connected state (Connected, SyncTarget,
5521   # SyncSource, etc.)
5522
5523   # 1st pass, assemble on all nodes in secondary mode
5524   for idx, inst_disk in enumerate(disks):
5525     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5526       if ignore_size:
5527         node_disk = node_disk.Copy()
5528         node_disk.UnsetSize()
5529       lu.cfg.SetDiskID(node_disk, node)
5530       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5531       msg = result.fail_msg
5532       if msg:
5533         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5534                            " (is_primary=False, pass=1): %s",
5535                            inst_disk.iv_name, node, msg)
5536         if not ignore_secondaries:
5537           disks_ok = False
5538
5539   # FIXME: race condition on drbd migration to primary
5540
5541   # 2nd pass, do only the primary node
5542   for idx, inst_disk in enumerate(disks):
5543     dev_path = None
5544
5545     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5546       if node != instance.primary_node:
5547         continue
5548       if ignore_size:
5549         node_disk = node_disk.Copy()
5550         node_disk.UnsetSize()
5551       lu.cfg.SetDiskID(node_disk, node)
5552       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5553       msg = result.fail_msg
5554       if msg:
5555         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5556                            " (is_primary=True, pass=2): %s",
5557                            inst_disk.iv_name, node, msg)
5558         disks_ok = False
5559       else:
5560         dev_path = result.payload
5561
5562     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5563
5564   # leave the disks configured for the primary node
5565   # this is a workaround that would be fixed better by
5566   # improving the logical/physical id handling
5567   for disk in disks:
5568     lu.cfg.SetDiskID(disk, instance.primary_node)
5569
5570   return disks_ok, device_info
5571
5572
5573 def _StartInstanceDisks(lu, instance, force):
5574   """Start the disks of an instance.
5575
5576   """
5577   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5578                                            ignore_secondaries=force)
5579   if not disks_ok:
5580     _ShutdownInstanceDisks(lu, instance)
5581     if force is not None and not force:
5582       lu.proc.LogWarning("", hint="If the message above refers to a"
5583                          " secondary node,"
5584                          " you can retry the operation using '--force'.")
5585     raise errors.OpExecError("Disk consistency error")
5586
5587
5588 class LUInstanceDeactivateDisks(NoHooksLU):
5589   """Shutdown an instance's disks.
5590
5591   """
5592   REQ_BGL = False
5593
5594   def ExpandNames(self):
5595     self._ExpandAndLockInstance()
5596     self.needed_locks[locking.LEVEL_NODE] = []
5597     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5598
5599   def DeclareLocks(self, level):
5600     if level == locking.LEVEL_NODE:
5601       self._LockInstancesNodes()
5602
5603   def CheckPrereq(self):
5604     """Check prerequisites.
5605
5606     This checks that the instance is in the cluster.
5607
5608     """
5609     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5610     assert self.instance is not None, \
5611       "Cannot retrieve locked instance %s" % self.op.instance_name
5612
5613   def Exec(self, feedback_fn):
5614     """Deactivate the disks
5615
5616     """
5617     instance = self.instance
5618     if self.op.force:
5619       _ShutdownInstanceDisks(self, instance)
5620     else:
5621       _SafeShutdownInstanceDisks(self, instance)
5622
5623
5624 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5625   """Shutdown block devices of an instance.
5626
5627   This function checks if an instance is running, before calling
5628   _ShutdownInstanceDisks.
5629
5630   """
5631   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5632   _ShutdownInstanceDisks(lu, instance, disks=disks)
5633
5634
5635 def _ExpandCheckDisks(instance, disks):
5636   """Return the instance disks selected by the disks list
5637
5638   @type disks: list of L{objects.Disk} or None
5639   @param disks: selected disks
5640   @rtype: list of L{objects.Disk}
5641   @return: selected instance disks to act on
5642
5643   """
5644   if disks is None:
5645     return instance.disks
5646   else:
5647     if not set(disks).issubset(instance.disks):
5648       raise errors.ProgrammerError("Can only act on disks belonging to the"
5649                                    " target instance")
5650     return disks
5651
5652
5653 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5654   """Shutdown block devices of an instance.
5655
5656   This does the shutdown on all nodes of the instance.
5657
5658   If the ignore_primary is false, errors on the primary node are
5659   ignored.
5660
5661   """
5662   all_result = True
5663   disks = _ExpandCheckDisks(instance, disks)
5664
5665   for disk in disks:
5666     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5667       lu.cfg.SetDiskID(top_disk, node)
5668       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5669       msg = result.fail_msg
5670       if msg:
5671         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5672                       disk.iv_name, node, msg)
5673         if ((node == instance.primary_node and not ignore_primary) or
5674             (node != instance.primary_node and not result.offline)):
5675           all_result = False
5676   return all_result
5677
5678
5679 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5680   """Checks if a node has enough free memory.
5681
5682   This function check if a given node has the needed amount of free
5683   memory. In case the node has less memory or we cannot get the
5684   information from the node, this function raise an OpPrereqError
5685   exception.
5686
5687   @type lu: C{LogicalUnit}
5688   @param lu: a logical unit from which we get configuration data
5689   @type node: C{str}
5690   @param node: the node to check
5691   @type reason: C{str}
5692   @param reason: string to use in the error message
5693   @type requested: C{int}
5694   @param requested: the amount of memory in MiB to check for
5695   @type hypervisor_name: C{str}
5696   @param hypervisor_name: the hypervisor to ask for memory stats
5697   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5698       we cannot check the node
5699
5700   """
5701   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5702   nodeinfo[node].Raise("Can't get data from node %s" % node,
5703                        prereq=True, ecode=errors.ECODE_ENVIRON)
5704   free_mem = nodeinfo[node].payload.get("memory_free", None)
5705   if not isinstance(free_mem, int):
5706     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5707                                " was '%s'" % (node, free_mem),
5708                                errors.ECODE_ENVIRON)
5709   if requested > free_mem:
5710     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5711                                " needed %s MiB, available %s MiB" %
5712                                (node, reason, requested, free_mem),
5713                                errors.ECODE_NORES)
5714
5715
5716 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5717   """Checks if nodes have enough free disk space in the all VGs.
5718
5719   This function check if all given nodes have the needed amount of
5720   free disk. In case any node has less disk or we cannot get the
5721   information from the node, this function raise an OpPrereqError
5722   exception.
5723
5724   @type lu: C{LogicalUnit}
5725   @param lu: a logical unit from which we get configuration data
5726   @type nodenames: C{list}
5727   @param nodenames: the list of node names to check
5728   @type req_sizes: C{dict}
5729   @param req_sizes: the hash of vg and corresponding amount of disk in
5730       MiB to check for
5731   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5732       or we cannot check the node
5733
5734   """
5735   for vg, req_size in req_sizes.items():
5736     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5737
5738
5739 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5740   """Checks if nodes have enough free disk space in the specified VG.
5741
5742   This function check if all given nodes have the needed amount of
5743   free disk. In case any node has less disk or we cannot get the
5744   information from the node, this function raise an OpPrereqError
5745   exception.
5746
5747   @type lu: C{LogicalUnit}
5748   @param lu: a logical unit from which we get configuration data
5749   @type nodenames: C{list}
5750   @param nodenames: the list of node names to check
5751   @type vg: C{str}
5752   @param vg: the volume group to check
5753   @type requested: C{int}
5754   @param requested: the amount of disk in MiB to check for
5755   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5756       or we cannot check the node
5757
5758   """
5759   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5760   for node in nodenames:
5761     info = nodeinfo[node]
5762     info.Raise("Cannot get current information from node %s" % node,
5763                prereq=True, ecode=errors.ECODE_ENVIRON)
5764     vg_free = info.payload.get("vg_free", None)
5765     if not isinstance(vg_free, int):
5766       raise errors.OpPrereqError("Can't compute free disk space on node"
5767                                  " %s for vg %s, result was '%s'" %
5768                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5769     if requested > vg_free:
5770       raise errors.OpPrereqError("Not enough disk space on target node %s"
5771                                  " vg %s: required %d MiB, available %d MiB" %
5772                                  (node, vg, requested, vg_free),
5773                                  errors.ECODE_NORES)
5774
5775
5776 class LUInstanceStartup(LogicalUnit):
5777   """Starts an instance.
5778
5779   """
5780   HPATH = "instance-start"
5781   HTYPE = constants.HTYPE_INSTANCE
5782   REQ_BGL = False
5783
5784   def CheckArguments(self):
5785     # extra beparams
5786     if self.op.beparams:
5787       # fill the beparams dict
5788       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5789
5790   def ExpandNames(self):
5791     self._ExpandAndLockInstance()
5792
5793   def BuildHooksEnv(self):
5794     """Build hooks env.
5795
5796     This runs on master, primary and secondary nodes of the instance.
5797
5798     """
5799     env = {
5800       "FORCE": self.op.force,
5801       }
5802
5803     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5804
5805     return env
5806
5807   def BuildHooksNodes(self):
5808     """Build hooks nodes.
5809
5810     """
5811     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5812     return (nl, nl)
5813
5814   def CheckPrereq(self):
5815     """Check prerequisites.
5816
5817     This checks that the instance is in the cluster.
5818
5819     """
5820     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5821     assert self.instance is not None, \
5822       "Cannot retrieve locked instance %s" % self.op.instance_name
5823
5824     # extra hvparams
5825     if self.op.hvparams:
5826       # check hypervisor parameter syntax (locally)
5827       cluster = self.cfg.GetClusterInfo()
5828       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5829       filled_hvp = cluster.FillHV(instance)
5830       filled_hvp.update(self.op.hvparams)
5831       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5832       hv_type.CheckParameterSyntax(filled_hvp)
5833       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5834
5835     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5836
5837     if self.primary_offline and self.op.ignore_offline_nodes:
5838       self.proc.LogWarning("Ignoring offline primary node")
5839
5840       if self.op.hvparams or self.op.beparams:
5841         self.proc.LogWarning("Overridden parameters are ignored")
5842     else:
5843       _CheckNodeOnline(self, instance.primary_node)
5844
5845       bep = self.cfg.GetClusterInfo().FillBE(instance)
5846
5847       # check bridges existence
5848       _CheckInstanceBridgesExist(self, instance)
5849
5850       remote_info = self.rpc.call_instance_info(instance.primary_node,
5851                                                 instance.name,
5852                                                 instance.hypervisor)
5853       remote_info.Raise("Error checking node %s" % instance.primary_node,
5854                         prereq=True, ecode=errors.ECODE_ENVIRON)
5855       if not remote_info.payload: # not running already
5856         _CheckNodeFreeMemory(self, instance.primary_node,
5857                              "starting instance %s" % instance.name,
5858                              bep[constants.BE_MEMORY], instance.hypervisor)
5859
5860   def Exec(self, feedback_fn):
5861     """Start the instance.
5862
5863     """
5864     instance = self.instance
5865     force = self.op.force
5866
5867     if not self.op.no_remember:
5868       self.cfg.MarkInstanceUp(instance.name)
5869
5870     if self.primary_offline:
5871       assert self.op.ignore_offline_nodes
5872       self.proc.LogInfo("Primary node offline, marked instance as started")
5873     else:
5874       node_current = instance.primary_node
5875
5876       _StartInstanceDisks(self, instance, force)
5877
5878       result = self.rpc.call_instance_start(node_current, instance,
5879                                             self.op.hvparams, self.op.beparams,
5880                                             self.op.startup_paused)
5881       msg = result.fail_msg
5882       if msg:
5883         _ShutdownInstanceDisks(self, instance)
5884         raise errors.OpExecError("Could not start instance: %s" % msg)
5885
5886
5887 class LUInstanceReboot(LogicalUnit):
5888   """Reboot an instance.
5889
5890   """
5891   HPATH = "instance-reboot"
5892   HTYPE = constants.HTYPE_INSTANCE
5893   REQ_BGL = False
5894
5895   def ExpandNames(self):
5896     self._ExpandAndLockInstance()
5897
5898   def BuildHooksEnv(self):
5899     """Build hooks env.
5900
5901     This runs on master, primary and secondary nodes of the instance.
5902
5903     """
5904     env = {
5905       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5906       "REBOOT_TYPE": self.op.reboot_type,
5907       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5908       }
5909
5910     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5911
5912     return env
5913
5914   def BuildHooksNodes(self):
5915     """Build hooks nodes.
5916
5917     """
5918     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5919     return (nl, nl)
5920
5921   def CheckPrereq(self):
5922     """Check prerequisites.
5923
5924     This checks that the instance is in the cluster.
5925
5926     """
5927     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5928     assert self.instance is not None, \
5929       "Cannot retrieve locked instance %s" % self.op.instance_name
5930
5931     _CheckNodeOnline(self, instance.primary_node)
5932
5933     # check bridges existence
5934     _CheckInstanceBridgesExist(self, instance)
5935
5936   def Exec(self, feedback_fn):
5937     """Reboot the instance.
5938
5939     """
5940     instance = self.instance
5941     ignore_secondaries = self.op.ignore_secondaries
5942     reboot_type = self.op.reboot_type
5943
5944     remote_info = self.rpc.call_instance_info(instance.primary_node,
5945                                               instance.name,
5946                                               instance.hypervisor)
5947     remote_info.Raise("Error checking node %s" % instance.primary_node)
5948     instance_running = bool(remote_info.payload)
5949
5950     node_current = instance.primary_node
5951
5952     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5953                                             constants.INSTANCE_REBOOT_HARD]:
5954       for disk in instance.disks:
5955         self.cfg.SetDiskID(disk, node_current)
5956       result = self.rpc.call_instance_reboot(node_current, instance,
5957                                              reboot_type,
5958                                              self.op.shutdown_timeout)
5959       result.Raise("Could not reboot instance")
5960     else:
5961       if instance_running:
5962         result = self.rpc.call_instance_shutdown(node_current, instance,
5963                                                  self.op.shutdown_timeout)
5964         result.Raise("Could not shutdown instance for full reboot")
5965         _ShutdownInstanceDisks(self, instance)
5966       else:
5967         self.LogInfo("Instance %s was already stopped, starting now",
5968                      instance.name)
5969       _StartInstanceDisks(self, instance, ignore_secondaries)
5970       result = self.rpc.call_instance_start(node_current, instance,
5971                                             None, None, False)
5972       msg = result.fail_msg
5973       if msg:
5974         _ShutdownInstanceDisks(self, instance)
5975         raise errors.OpExecError("Could not start instance for"
5976                                  " full reboot: %s" % msg)
5977
5978     self.cfg.MarkInstanceUp(instance.name)
5979
5980
5981 class LUInstanceShutdown(LogicalUnit):
5982   """Shutdown an instance.
5983
5984   """
5985   HPATH = "instance-stop"
5986   HTYPE = constants.HTYPE_INSTANCE
5987   REQ_BGL = False
5988
5989   def ExpandNames(self):
5990     self._ExpandAndLockInstance()
5991
5992   def BuildHooksEnv(self):
5993     """Build hooks env.
5994
5995     This runs on master, primary and secondary nodes of the instance.
5996
5997     """
5998     env = _BuildInstanceHookEnvByObject(self, self.instance)
5999     env["TIMEOUT"] = self.op.timeout
6000     return env
6001
6002   def BuildHooksNodes(self):
6003     """Build hooks nodes.
6004
6005     """
6006     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6007     return (nl, nl)
6008
6009   def CheckPrereq(self):
6010     """Check prerequisites.
6011
6012     This checks that the instance is in the cluster.
6013
6014     """
6015     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6016     assert self.instance is not None, \
6017       "Cannot retrieve locked instance %s" % self.op.instance_name
6018
6019     self.primary_offline = \
6020       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6021
6022     if self.primary_offline and self.op.ignore_offline_nodes:
6023       self.proc.LogWarning("Ignoring offline primary node")
6024     else:
6025       _CheckNodeOnline(self, self.instance.primary_node)
6026
6027   def Exec(self, feedback_fn):
6028     """Shutdown the instance.
6029
6030     """
6031     instance = self.instance
6032     node_current = instance.primary_node
6033     timeout = self.op.timeout
6034
6035     if not self.op.no_remember:
6036       self.cfg.MarkInstanceDown(instance.name)
6037
6038     if self.primary_offline:
6039       assert self.op.ignore_offline_nodes
6040       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6041     else:
6042       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6043       msg = result.fail_msg
6044       if msg:
6045         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6046
6047       _ShutdownInstanceDisks(self, instance)
6048
6049
6050 class LUInstanceReinstall(LogicalUnit):
6051   """Reinstall an instance.
6052
6053   """
6054   HPATH = "instance-reinstall"
6055   HTYPE = constants.HTYPE_INSTANCE
6056   REQ_BGL = False
6057
6058   def ExpandNames(self):
6059     self._ExpandAndLockInstance()
6060
6061   def BuildHooksEnv(self):
6062     """Build hooks env.
6063
6064     This runs on master, primary and secondary nodes of the instance.
6065
6066     """
6067     return _BuildInstanceHookEnvByObject(self, self.instance)
6068
6069   def BuildHooksNodes(self):
6070     """Build hooks nodes.
6071
6072     """
6073     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6074     return (nl, nl)
6075
6076   def CheckPrereq(self):
6077     """Check prerequisites.
6078
6079     This checks that the instance is in the cluster and is not running.
6080
6081     """
6082     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6083     assert instance is not None, \
6084       "Cannot retrieve locked instance %s" % self.op.instance_name
6085     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6086                      " offline, cannot reinstall")
6087     for node in instance.secondary_nodes:
6088       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6089                        " cannot reinstall")
6090
6091     if instance.disk_template == constants.DT_DISKLESS:
6092       raise errors.OpPrereqError("Instance '%s' has no disks" %
6093                                  self.op.instance_name,
6094                                  errors.ECODE_INVAL)
6095     _CheckInstanceDown(self, instance, "cannot reinstall")
6096
6097     if self.op.os_type is not None:
6098       # OS verification
6099       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6100       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6101       instance_os = self.op.os_type
6102     else:
6103       instance_os = instance.os
6104
6105     nodelist = list(instance.all_nodes)
6106
6107     if self.op.osparams:
6108       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6109       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6110       self.os_inst = i_osdict # the new dict (without defaults)
6111     else:
6112       self.os_inst = None
6113
6114     self.instance = instance
6115
6116   def Exec(self, feedback_fn):
6117     """Reinstall the instance.
6118
6119     """
6120     inst = self.instance
6121
6122     if self.op.os_type is not None:
6123       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6124       inst.os = self.op.os_type
6125       # Write to configuration
6126       self.cfg.Update(inst, feedback_fn)
6127
6128     _StartInstanceDisks(self, inst, None)
6129     try:
6130       feedback_fn("Running the instance OS create scripts...")
6131       # FIXME: pass debug option from opcode to backend
6132       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6133                                              self.op.debug_level,
6134                                              osparams=self.os_inst)
6135       result.Raise("Could not install OS for instance %s on node %s" %
6136                    (inst.name, inst.primary_node))
6137     finally:
6138       _ShutdownInstanceDisks(self, inst)
6139
6140
6141 class LUInstanceRecreateDisks(LogicalUnit):
6142   """Recreate an instance's missing disks.
6143
6144   """
6145   HPATH = "instance-recreate-disks"
6146   HTYPE = constants.HTYPE_INSTANCE
6147   REQ_BGL = False
6148
6149   def CheckArguments(self):
6150     # normalise the disk list
6151     self.op.disks = sorted(frozenset(self.op.disks))
6152
6153   def ExpandNames(self):
6154     self._ExpandAndLockInstance()
6155     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6156     if self.op.nodes:
6157       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6158       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6159     else:
6160       self.needed_locks[locking.LEVEL_NODE] = []
6161
6162   def DeclareLocks(self, level):
6163     if level == locking.LEVEL_NODE:
6164       # if we replace the nodes, we only need to lock the old primary,
6165       # otherwise we need to lock all nodes for disk re-creation
6166       primary_only = bool(self.op.nodes)
6167       self._LockInstancesNodes(primary_only=primary_only)
6168
6169   def BuildHooksEnv(self):
6170     """Build hooks env.
6171
6172     This runs on master, primary and secondary nodes of the instance.
6173
6174     """
6175     return _BuildInstanceHookEnvByObject(self, self.instance)
6176
6177   def BuildHooksNodes(self):
6178     """Build hooks nodes.
6179
6180     """
6181     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6182     return (nl, nl)
6183
6184   def CheckPrereq(self):
6185     """Check prerequisites.
6186
6187     This checks that the instance is in the cluster and is not running.
6188
6189     """
6190     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6191     assert instance is not None, \
6192       "Cannot retrieve locked instance %s" % self.op.instance_name
6193     if self.op.nodes:
6194       if len(self.op.nodes) != len(instance.all_nodes):
6195         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6196                                    " %d replacement nodes were specified" %
6197                                    (instance.name, len(instance.all_nodes),
6198                                     len(self.op.nodes)),
6199                                    errors.ECODE_INVAL)
6200       assert instance.disk_template != constants.DT_DRBD8 or \
6201           len(self.op.nodes) == 2
6202       assert instance.disk_template != constants.DT_PLAIN or \
6203           len(self.op.nodes) == 1
6204       primary_node = self.op.nodes[0]
6205     else:
6206       primary_node = instance.primary_node
6207     _CheckNodeOnline(self, primary_node)
6208
6209     if instance.disk_template == constants.DT_DISKLESS:
6210       raise errors.OpPrereqError("Instance '%s' has no disks" %
6211                                  self.op.instance_name, errors.ECODE_INVAL)
6212     # if we replace nodes *and* the old primary is offline, we don't
6213     # check
6214     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6215     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6216     if not (self.op.nodes and old_pnode.offline):
6217       _CheckInstanceDown(self, instance, "cannot recreate disks")
6218
6219     if not self.op.disks:
6220       self.op.disks = range(len(instance.disks))
6221     else:
6222       for idx in self.op.disks:
6223         if idx >= len(instance.disks):
6224           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6225                                      errors.ECODE_INVAL)
6226     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6227       raise errors.OpPrereqError("Can't recreate disks partially and"
6228                                  " change the nodes at the same time",
6229                                  errors.ECODE_INVAL)
6230     self.instance = instance
6231
6232   def Exec(self, feedback_fn):
6233     """Recreate the disks.
6234
6235     """
6236     instance = self.instance
6237
6238     to_skip = []
6239     mods = [] # keeps track of needed logical_id changes
6240
6241     for idx, disk in enumerate(instance.disks):
6242       if idx not in self.op.disks: # disk idx has not been passed in
6243         to_skip.append(idx)
6244         continue
6245       # update secondaries for disks, if needed
6246       if self.op.nodes:
6247         if disk.dev_type == constants.LD_DRBD8:
6248           # need to update the nodes and minors
6249           assert len(self.op.nodes) == 2
6250           assert len(disk.logical_id) == 6 # otherwise disk internals
6251                                            # have changed
6252           (_, _, old_port, _, _, old_secret) = disk.logical_id
6253           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6254           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6255                     new_minors[0], new_minors[1], old_secret)
6256           assert len(disk.logical_id) == len(new_id)
6257           mods.append((idx, new_id))
6258
6259     # now that we have passed all asserts above, we can apply the mods
6260     # in a single run (to avoid partial changes)
6261     for idx, new_id in mods:
6262       instance.disks[idx].logical_id = new_id
6263
6264     # change primary node, if needed
6265     if self.op.nodes:
6266       instance.primary_node = self.op.nodes[0]
6267       self.LogWarning("Changing the instance's nodes, you will have to"
6268                       " remove any disks left on the older nodes manually")
6269
6270     if self.op.nodes:
6271       self.cfg.Update(instance, feedback_fn)
6272
6273     _CreateDisks(self, instance, to_skip=to_skip)
6274
6275
6276 class LUInstanceRename(LogicalUnit):
6277   """Rename an instance.
6278
6279   """
6280   HPATH = "instance-rename"
6281   HTYPE = constants.HTYPE_INSTANCE
6282
6283   def CheckArguments(self):
6284     """Check arguments.
6285
6286     """
6287     if self.op.ip_check and not self.op.name_check:
6288       # TODO: make the ip check more flexible and not depend on the name check
6289       raise errors.OpPrereqError("IP address check requires a name check",
6290                                  errors.ECODE_INVAL)
6291
6292   def BuildHooksEnv(self):
6293     """Build hooks env.
6294
6295     This runs on master, primary and secondary nodes of the instance.
6296
6297     """
6298     env = _BuildInstanceHookEnvByObject(self, self.instance)
6299     env["INSTANCE_NEW_NAME"] = self.op.new_name
6300     return env
6301
6302   def BuildHooksNodes(self):
6303     """Build hooks nodes.
6304
6305     """
6306     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6307     return (nl, nl)
6308
6309   def CheckPrereq(self):
6310     """Check prerequisites.
6311
6312     This checks that the instance is in the cluster and is not running.
6313
6314     """
6315     self.op.instance_name = _ExpandInstanceName(self.cfg,
6316                                                 self.op.instance_name)
6317     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6318     assert instance is not None
6319     _CheckNodeOnline(self, instance.primary_node)
6320     _CheckInstanceDown(self, instance, "cannot rename")
6321     self.instance = instance
6322
6323     new_name = self.op.new_name
6324     if self.op.name_check:
6325       hostname = netutils.GetHostname(name=new_name)
6326       if hostname != new_name:
6327         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6328                      hostname.name)
6329       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6330         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6331                                     " same as given hostname '%s'") %
6332                                     (hostname.name, self.op.new_name),
6333                                     errors.ECODE_INVAL)
6334       new_name = self.op.new_name = hostname.name
6335       if (self.op.ip_check and
6336           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6337         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6338                                    (hostname.ip, new_name),
6339                                    errors.ECODE_NOTUNIQUE)
6340
6341     instance_list = self.cfg.GetInstanceList()
6342     if new_name in instance_list and new_name != instance.name:
6343       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6344                                  new_name, errors.ECODE_EXISTS)
6345
6346   def Exec(self, feedback_fn):
6347     """Rename the instance.
6348
6349     """
6350     inst = self.instance
6351     old_name = inst.name
6352
6353     rename_file_storage = False
6354     if (inst.disk_template in constants.DTS_FILEBASED and
6355         self.op.new_name != inst.name):
6356       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6357       rename_file_storage = True
6358
6359     self.cfg.RenameInstance(inst.name, self.op.new_name)
6360     # Change the instance lock. This is definitely safe while we hold the BGL.
6361     # Otherwise the new lock would have to be added in acquired mode.
6362     assert self.REQ_BGL
6363     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6364     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6365
6366     # re-read the instance from the configuration after rename
6367     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6368
6369     if rename_file_storage:
6370       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6371       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6372                                                      old_file_storage_dir,
6373                                                      new_file_storage_dir)
6374       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6375                    " (but the instance has been renamed in Ganeti)" %
6376                    (inst.primary_node, old_file_storage_dir,
6377                     new_file_storage_dir))
6378
6379     _StartInstanceDisks(self, inst, None)
6380     try:
6381       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6382                                                  old_name, self.op.debug_level)
6383       msg = result.fail_msg
6384       if msg:
6385         msg = ("Could not run OS rename script for instance %s on node %s"
6386                " (but the instance has been renamed in Ganeti): %s" %
6387                (inst.name, inst.primary_node, msg))
6388         self.proc.LogWarning(msg)
6389     finally:
6390       _ShutdownInstanceDisks(self, inst)
6391
6392     return inst.name
6393
6394
6395 class LUInstanceRemove(LogicalUnit):
6396   """Remove an instance.
6397
6398   """
6399   HPATH = "instance-remove"
6400   HTYPE = constants.HTYPE_INSTANCE
6401   REQ_BGL = False
6402
6403   def ExpandNames(self):
6404     self._ExpandAndLockInstance()
6405     self.needed_locks[locking.LEVEL_NODE] = []
6406     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6407
6408   def DeclareLocks(self, level):
6409     if level == locking.LEVEL_NODE:
6410       self._LockInstancesNodes()
6411
6412   def BuildHooksEnv(self):
6413     """Build hooks env.
6414
6415     This runs on master, primary and secondary nodes of the instance.
6416
6417     """
6418     env = _BuildInstanceHookEnvByObject(self, self.instance)
6419     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6420     return env
6421
6422   def BuildHooksNodes(self):
6423     """Build hooks nodes.
6424
6425     """
6426     nl = [self.cfg.GetMasterNode()]
6427     nl_post = list(self.instance.all_nodes) + nl
6428     return (nl, nl_post)
6429
6430   def CheckPrereq(self):
6431     """Check prerequisites.
6432
6433     This checks that the instance is in the cluster.
6434
6435     """
6436     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6437     assert self.instance is not None, \
6438       "Cannot retrieve locked instance %s" % self.op.instance_name
6439
6440   def Exec(self, feedback_fn):
6441     """Remove the instance.
6442
6443     """
6444     instance = self.instance
6445     logging.info("Shutting down instance %s on node %s",
6446                  instance.name, instance.primary_node)
6447
6448     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6449                                              self.op.shutdown_timeout)
6450     msg = result.fail_msg
6451     if msg:
6452       if self.op.ignore_failures:
6453         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6454       else:
6455         raise errors.OpExecError("Could not shutdown instance %s on"
6456                                  " node %s: %s" %
6457                                  (instance.name, instance.primary_node, msg))
6458
6459     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6460
6461
6462 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6463   """Utility function to remove an instance.
6464
6465   """
6466   logging.info("Removing block devices for instance %s", instance.name)
6467
6468   if not _RemoveDisks(lu, instance):
6469     if not ignore_failures:
6470       raise errors.OpExecError("Can't remove instance's disks")
6471     feedback_fn("Warning: can't remove instance's disks")
6472
6473   logging.info("Removing instance %s out of cluster config", instance.name)
6474
6475   lu.cfg.RemoveInstance(instance.name)
6476
6477   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6478     "Instance lock removal conflict"
6479
6480   # Remove lock for the instance
6481   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6482
6483
6484 class LUInstanceQuery(NoHooksLU):
6485   """Logical unit for querying instances.
6486
6487   """
6488   # pylint: disable-msg=W0142
6489   REQ_BGL = False
6490
6491   def CheckArguments(self):
6492     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6493                              self.op.output_fields, self.op.use_locking)
6494
6495   def ExpandNames(self):
6496     self.iq.ExpandNames(self)
6497
6498   def DeclareLocks(self, level):
6499     self.iq.DeclareLocks(self, level)
6500
6501   def Exec(self, feedback_fn):
6502     return self.iq.OldStyleQuery(self)
6503
6504
6505 class LUInstanceFailover(LogicalUnit):
6506   """Failover an instance.
6507
6508   """
6509   HPATH = "instance-failover"
6510   HTYPE = constants.HTYPE_INSTANCE
6511   REQ_BGL = False
6512
6513   def CheckArguments(self):
6514     """Check the arguments.
6515
6516     """
6517     self.iallocator = getattr(self.op, "iallocator", None)
6518     self.target_node = getattr(self.op, "target_node", None)
6519
6520   def ExpandNames(self):
6521     self._ExpandAndLockInstance()
6522
6523     if self.op.target_node is not None:
6524       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6525
6526     self.needed_locks[locking.LEVEL_NODE] = []
6527     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6528
6529     ignore_consistency = self.op.ignore_consistency
6530     shutdown_timeout = self.op.shutdown_timeout
6531     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6532                                        cleanup=False,
6533                                        failover=True,
6534                                        ignore_consistency=ignore_consistency,
6535                                        shutdown_timeout=shutdown_timeout)
6536     self.tasklets = [self._migrater]
6537
6538   def DeclareLocks(self, level):
6539     if level == locking.LEVEL_NODE:
6540       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6541       if instance.disk_template in constants.DTS_EXT_MIRROR:
6542         if self.op.target_node is None:
6543           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6544         else:
6545           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6546                                                    self.op.target_node]
6547         del self.recalculate_locks[locking.LEVEL_NODE]
6548       else:
6549         self._LockInstancesNodes()
6550
6551   def BuildHooksEnv(self):
6552     """Build hooks env.
6553
6554     This runs on master, primary and secondary nodes of the instance.
6555
6556     """
6557     instance = self._migrater.instance
6558     source_node = instance.primary_node
6559     target_node = self.op.target_node
6560     env = {
6561       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6562       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6563       "OLD_PRIMARY": source_node,
6564       "NEW_PRIMARY": target_node,
6565       }
6566
6567     if instance.disk_template in constants.DTS_INT_MIRROR:
6568       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6569       env["NEW_SECONDARY"] = source_node
6570     else:
6571       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6572
6573     env.update(_BuildInstanceHookEnvByObject(self, instance))
6574
6575     return env
6576
6577   def BuildHooksNodes(self):
6578     """Build hooks nodes.
6579
6580     """
6581     instance = self._migrater.instance
6582     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6583     return (nl, nl + [instance.primary_node])
6584
6585
6586 class LUInstanceMigrate(LogicalUnit):
6587   """Migrate an instance.
6588
6589   This is migration without shutting down, compared to the failover,
6590   which is done with shutdown.
6591
6592   """
6593   HPATH = "instance-migrate"
6594   HTYPE = constants.HTYPE_INSTANCE
6595   REQ_BGL = False
6596
6597   def ExpandNames(self):
6598     self._ExpandAndLockInstance()
6599
6600     if self.op.target_node is not None:
6601       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6602
6603     self.needed_locks[locking.LEVEL_NODE] = []
6604     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6605
6606     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6607                                        cleanup=self.op.cleanup,
6608                                        failover=False,
6609                                        fallback=self.op.allow_failover)
6610     self.tasklets = [self._migrater]
6611
6612   def DeclareLocks(self, level):
6613     if level == locking.LEVEL_NODE:
6614       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6615       if instance.disk_template in constants.DTS_EXT_MIRROR:
6616         if self.op.target_node is None:
6617           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6618         else:
6619           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6620                                                    self.op.target_node]
6621         del self.recalculate_locks[locking.LEVEL_NODE]
6622       else:
6623         self._LockInstancesNodes()
6624
6625   def BuildHooksEnv(self):
6626     """Build hooks env.
6627
6628     This runs on master, primary and secondary nodes of the instance.
6629
6630     """
6631     instance = self._migrater.instance
6632     source_node = instance.primary_node
6633     target_node = self.op.target_node
6634     env = _BuildInstanceHookEnvByObject(self, instance)
6635     env.update({
6636       "MIGRATE_LIVE": self._migrater.live,
6637       "MIGRATE_CLEANUP": self.op.cleanup,
6638       "OLD_PRIMARY": source_node,
6639       "NEW_PRIMARY": target_node,
6640       })
6641
6642     if instance.disk_template in constants.DTS_INT_MIRROR:
6643       env["OLD_SECONDARY"] = target_node
6644       env["NEW_SECONDARY"] = source_node
6645     else:
6646       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6647
6648     return env
6649
6650   def BuildHooksNodes(self):
6651     """Build hooks nodes.
6652
6653     """
6654     instance = self._migrater.instance
6655     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6656     return (nl, nl + [instance.primary_node])
6657
6658
6659 class LUInstanceMove(LogicalUnit):
6660   """Move an instance by data-copying.
6661
6662   """
6663   HPATH = "instance-move"
6664   HTYPE = constants.HTYPE_INSTANCE
6665   REQ_BGL = False
6666
6667   def ExpandNames(self):
6668     self._ExpandAndLockInstance()
6669     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6670     self.op.target_node = target_node
6671     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6672     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6673
6674   def DeclareLocks(self, level):
6675     if level == locking.LEVEL_NODE:
6676       self._LockInstancesNodes(primary_only=True)
6677
6678   def BuildHooksEnv(self):
6679     """Build hooks env.
6680
6681     This runs on master, primary and secondary nodes of the instance.
6682
6683     """
6684     env = {
6685       "TARGET_NODE": self.op.target_node,
6686       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6687       }
6688     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6689     return env
6690
6691   def BuildHooksNodes(self):
6692     """Build hooks nodes.
6693
6694     """
6695     nl = [
6696       self.cfg.GetMasterNode(),
6697       self.instance.primary_node,
6698       self.op.target_node,
6699       ]
6700     return (nl, nl)
6701
6702   def CheckPrereq(self):
6703     """Check prerequisites.
6704
6705     This checks that the instance is in the cluster.
6706
6707     """
6708     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6709     assert self.instance is not None, \
6710       "Cannot retrieve locked instance %s" % self.op.instance_name
6711
6712     node = self.cfg.GetNodeInfo(self.op.target_node)
6713     assert node is not None, \
6714       "Cannot retrieve locked node %s" % self.op.target_node
6715
6716     self.target_node = target_node = node.name
6717
6718     if target_node == instance.primary_node:
6719       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6720                                  (instance.name, target_node),
6721                                  errors.ECODE_STATE)
6722
6723     bep = self.cfg.GetClusterInfo().FillBE(instance)
6724
6725     for idx, dsk in enumerate(instance.disks):
6726       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6727         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6728                                    " cannot copy" % idx, errors.ECODE_STATE)
6729
6730     _CheckNodeOnline(self, target_node)
6731     _CheckNodeNotDrained(self, target_node)
6732     _CheckNodeVmCapable(self, target_node)
6733
6734     if instance.admin_up:
6735       # check memory requirements on the secondary node
6736       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6737                            instance.name, bep[constants.BE_MEMORY],
6738                            instance.hypervisor)
6739     else:
6740       self.LogInfo("Not checking memory on the secondary node as"
6741                    " instance will not be started")
6742
6743     # check bridge existance
6744     _CheckInstanceBridgesExist(self, instance, node=target_node)
6745
6746   def Exec(self, feedback_fn):
6747     """Move an instance.
6748
6749     The move is done by shutting it down on its present node, copying
6750     the data over (slow) and starting it on the new node.
6751
6752     """
6753     instance = self.instance
6754
6755     source_node = instance.primary_node
6756     target_node = self.target_node
6757
6758     self.LogInfo("Shutting down instance %s on source node %s",
6759                  instance.name, source_node)
6760
6761     result = self.rpc.call_instance_shutdown(source_node, instance,
6762                                              self.op.shutdown_timeout)
6763     msg = result.fail_msg
6764     if msg:
6765       if self.op.ignore_consistency:
6766         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6767                              " Proceeding anyway. Please make sure node"
6768                              " %s is down. Error details: %s",
6769                              instance.name, source_node, source_node, msg)
6770       else:
6771         raise errors.OpExecError("Could not shutdown instance %s on"
6772                                  " node %s: %s" %
6773                                  (instance.name, source_node, msg))
6774
6775     # create the target disks
6776     try:
6777       _CreateDisks(self, instance, target_node=target_node)
6778     except errors.OpExecError:
6779       self.LogWarning("Device creation failed, reverting...")
6780       try:
6781         _RemoveDisks(self, instance, target_node=target_node)
6782       finally:
6783         self.cfg.ReleaseDRBDMinors(instance.name)
6784         raise
6785
6786     cluster_name = self.cfg.GetClusterInfo().cluster_name
6787
6788     errs = []
6789     # activate, get path, copy the data over
6790     for idx, disk in enumerate(instance.disks):
6791       self.LogInfo("Copying data for disk %d", idx)
6792       result = self.rpc.call_blockdev_assemble(target_node, disk,
6793                                                instance.name, True, idx)
6794       if result.fail_msg:
6795         self.LogWarning("Can't assemble newly created disk %d: %s",
6796                         idx, result.fail_msg)
6797         errs.append(result.fail_msg)
6798         break
6799       dev_path = result.payload
6800       result = self.rpc.call_blockdev_export(source_node, disk,
6801                                              target_node, dev_path,
6802                                              cluster_name)
6803       if result.fail_msg:
6804         self.LogWarning("Can't copy data over for disk %d: %s",
6805                         idx, result.fail_msg)
6806         errs.append(result.fail_msg)
6807         break
6808
6809     if errs:
6810       self.LogWarning("Some disks failed to copy, aborting")
6811       try:
6812         _RemoveDisks(self, instance, target_node=target_node)
6813       finally:
6814         self.cfg.ReleaseDRBDMinors(instance.name)
6815         raise errors.OpExecError("Errors during disk copy: %s" %
6816                                  (",".join(errs),))
6817
6818     instance.primary_node = target_node
6819     self.cfg.Update(instance, feedback_fn)
6820
6821     self.LogInfo("Removing the disks on the original node")
6822     _RemoveDisks(self, instance, target_node=source_node)
6823
6824     # Only start the instance if it's marked as up
6825     if instance.admin_up:
6826       self.LogInfo("Starting instance %s on node %s",
6827                    instance.name, target_node)
6828
6829       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6830                                            ignore_secondaries=True)
6831       if not disks_ok:
6832         _ShutdownInstanceDisks(self, instance)
6833         raise errors.OpExecError("Can't activate the instance's disks")
6834
6835       result = self.rpc.call_instance_start(target_node, instance,
6836                                             None, None, False)
6837       msg = result.fail_msg
6838       if msg:
6839         _ShutdownInstanceDisks(self, instance)
6840         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6841                                  (instance.name, target_node, msg))
6842
6843
6844 class LUNodeMigrate(LogicalUnit):
6845   """Migrate all instances from a node.
6846
6847   """
6848   HPATH = "node-migrate"
6849   HTYPE = constants.HTYPE_NODE
6850   REQ_BGL = False
6851
6852   def CheckArguments(self):
6853     pass
6854
6855   def ExpandNames(self):
6856     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6857
6858     self.share_locks = _ShareAll()
6859     self.needed_locks = {
6860       locking.LEVEL_NODE: [self.op.node_name],
6861       }
6862
6863   def BuildHooksEnv(self):
6864     """Build hooks env.
6865
6866     This runs on the master, the primary and all the secondaries.
6867
6868     """
6869     return {
6870       "NODE_NAME": self.op.node_name,
6871       }
6872
6873   def BuildHooksNodes(self):
6874     """Build hooks nodes.
6875
6876     """
6877     nl = [self.cfg.GetMasterNode()]
6878     return (nl, nl)
6879
6880   def CheckPrereq(self):
6881     pass
6882
6883   def Exec(self, feedback_fn):
6884     # Prepare jobs for migration instances
6885     jobs = [
6886       [opcodes.OpInstanceMigrate(instance_name=inst.name,
6887                                  mode=self.op.mode,
6888                                  live=self.op.live,
6889                                  iallocator=self.op.iallocator,
6890                                  target_node=self.op.target_node)]
6891       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6892       ]
6893
6894     # TODO: Run iallocator in this opcode and pass correct placement options to
6895     # OpInstanceMigrate. Since other jobs can modify the cluster between
6896     # running the iallocator and the actual migration, a good consistency model
6897     # will have to be found.
6898
6899     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
6900             frozenset([self.op.node_name]))
6901
6902     return ResultWithJobs(jobs)
6903
6904
6905 class TLMigrateInstance(Tasklet):
6906   """Tasklet class for instance migration.
6907
6908   @type live: boolean
6909   @ivar live: whether the migration will be done live or non-live;
6910       this variable is initalized only after CheckPrereq has run
6911   @type cleanup: boolean
6912   @ivar cleanup: Wheater we cleanup from a failed migration
6913   @type iallocator: string
6914   @ivar iallocator: The iallocator used to determine target_node
6915   @type target_node: string
6916   @ivar target_node: If given, the target_node to reallocate the instance to
6917   @type failover: boolean
6918   @ivar failover: Whether operation results in failover or migration
6919   @type fallback: boolean
6920   @ivar fallback: Whether fallback to failover is allowed if migration not
6921                   possible
6922   @type ignore_consistency: boolean
6923   @ivar ignore_consistency: Wheter we should ignore consistency between source
6924                             and target node
6925   @type shutdown_timeout: int
6926   @ivar shutdown_timeout: In case of failover timeout of the shutdown
6927
6928   """
6929   def __init__(self, lu, instance_name, cleanup=False,
6930                failover=False, fallback=False,
6931                ignore_consistency=False,
6932                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6933     """Initializes this class.
6934
6935     """
6936     Tasklet.__init__(self, lu)
6937
6938     # Parameters
6939     self.instance_name = instance_name
6940     self.cleanup = cleanup
6941     self.live = False # will be overridden later
6942     self.failover = failover
6943     self.fallback = fallback
6944     self.ignore_consistency = ignore_consistency
6945     self.shutdown_timeout = shutdown_timeout
6946
6947   def CheckPrereq(self):
6948     """Check prerequisites.
6949
6950     This checks that the instance is in the cluster.
6951
6952     """
6953     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6954     instance = self.cfg.GetInstanceInfo(instance_name)
6955     assert instance is not None
6956     self.instance = instance
6957
6958     if (not self.cleanup and not instance.admin_up and not self.failover and
6959         self.fallback):
6960       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6961                       " to failover")
6962       self.failover = True
6963
6964     if instance.disk_template not in constants.DTS_MIRRORED:
6965       if self.failover:
6966         text = "failovers"
6967       else:
6968         text = "migrations"
6969       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6970                                  " %s" % (instance.disk_template, text),
6971                                  errors.ECODE_STATE)
6972
6973     if instance.disk_template in constants.DTS_EXT_MIRROR:
6974       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6975
6976       if self.lu.op.iallocator:
6977         self._RunAllocator()
6978       else:
6979         # We set set self.target_node as it is required by
6980         # BuildHooksEnv
6981         self.target_node = self.lu.op.target_node
6982
6983       # self.target_node is already populated, either directly or by the
6984       # iallocator run
6985       target_node = self.target_node
6986       if self.target_node == instance.primary_node:
6987         raise errors.OpPrereqError("Cannot migrate instance %s"
6988                                    " to its primary (%s)" %
6989                                    (instance.name, instance.primary_node))
6990
6991       if len(self.lu.tasklets) == 1:
6992         # It is safe to release locks only when we're the only tasklet
6993         # in the LU
6994         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6995                       keep=[instance.primary_node, self.target_node])
6996
6997     else:
6998       secondary_nodes = instance.secondary_nodes
6999       if not secondary_nodes:
7000         raise errors.ConfigurationError("No secondary node but using"
7001                                         " %s disk template" %
7002                                         instance.disk_template)
7003       target_node = secondary_nodes[0]
7004       if self.lu.op.iallocator or (self.lu.op.target_node and
7005                                    self.lu.op.target_node != target_node):
7006         if self.failover:
7007           text = "failed over"
7008         else:
7009           text = "migrated"
7010         raise errors.OpPrereqError("Instances with disk template %s cannot"
7011                                    " be %s to arbitrary nodes"
7012                                    " (neither an iallocator nor a target"
7013                                    " node can be passed)" %
7014                                    (instance.disk_template, text),
7015                                    errors.ECODE_INVAL)
7016
7017     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7018
7019     # check memory requirements on the secondary node
7020     if not self.failover or instance.admin_up:
7021       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7022                            instance.name, i_be[constants.BE_MEMORY],
7023                            instance.hypervisor)
7024     else:
7025       self.lu.LogInfo("Not checking memory on the secondary node as"
7026                       " instance will not be started")
7027
7028     # check bridge existance
7029     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7030
7031     if not self.cleanup:
7032       _CheckNodeNotDrained(self.lu, target_node)
7033       if not self.failover:
7034         result = self.rpc.call_instance_migratable(instance.primary_node,
7035                                                    instance)
7036         if result.fail_msg and self.fallback:
7037           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7038                           " failover")
7039           self.failover = True
7040         else:
7041           result.Raise("Can't migrate, please use failover",
7042                        prereq=True, ecode=errors.ECODE_STATE)
7043
7044     assert not (self.failover and self.cleanup)
7045
7046     if not self.failover:
7047       if self.lu.op.live is not None and self.lu.op.mode is not None:
7048         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7049                                    " parameters are accepted",
7050                                    errors.ECODE_INVAL)
7051       if self.lu.op.live is not None:
7052         if self.lu.op.live:
7053           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7054         else:
7055           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7056         # reset the 'live' parameter to None so that repeated
7057         # invocations of CheckPrereq do not raise an exception
7058         self.lu.op.live = None
7059       elif self.lu.op.mode is None:
7060         # read the default value from the hypervisor
7061         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7062                                                 skip_globals=False)
7063         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7064
7065       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7066     else:
7067       # Failover is never live
7068       self.live = False
7069
7070   def _RunAllocator(self):
7071     """Run the allocator based on input opcode.
7072
7073     """
7074     ial = IAllocator(self.cfg, self.rpc,
7075                      mode=constants.IALLOCATOR_MODE_RELOC,
7076                      name=self.instance_name,
7077                      # TODO See why hail breaks with a single node below
7078                      relocate_from=[self.instance.primary_node,
7079                                     self.instance.primary_node],
7080                      )
7081
7082     ial.Run(self.lu.op.iallocator)
7083
7084     if not ial.success:
7085       raise errors.OpPrereqError("Can't compute nodes using"
7086                                  " iallocator '%s': %s" %
7087                                  (self.lu.op.iallocator, ial.info),
7088                                  errors.ECODE_NORES)
7089     if len(ial.result) != ial.required_nodes:
7090       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7091                                  " of nodes (%s), required %s" %
7092                                  (self.lu.op.iallocator, len(ial.result),
7093                                   ial.required_nodes), errors.ECODE_FAULT)
7094     self.target_node = ial.result[0]
7095     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7096                  self.instance_name, self.lu.op.iallocator,
7097                  utils.CommaJoin(ial.result))
7098
7099   def _WaitUntilSync(self):
7100     """Poll with custom rpc for disk sync.
7101
7102     This uses our own step-based rpc call.
7103
7104     """
7105     self.feedback_fn("* wait until resync is done")
7106     all_done = False
7107     while not all_done:
7108       all_done = True
7109       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7110                                             self.nodes_ip,
7111                                             self.instance.disks)
7112       min_percent = 100
7113       for node, nres in result.items():
7114         nres.Raise("Cannot resync disks on node %s" % node)
7115         node_done, node_percent = nres.payload
7116         all_done = all_done and node_done
7117         if node_percent is not None:
7118           min_percent = min(min_percent, node_percent)
7119       if not all_done:
7120         if min_percent < 100:
7121           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7122         time.sleep(2)
7123
7124   def _EnsureSecondary(self, node):
7125     """Demote a node to secondary.
7126
7127     """
7128     self.feedback_fn("* switching node %s to secondary mode" % node)
7129
7130     for dev in self.instance.disks:
7131       self.cfg.SetDiskID(dev, node)
7132
7133     result = self.rpc.call_blockdev_close(node, self.instance.name,
7134                                           self.instance.disks)
7135     result.Raise("Cannot change disk to secondary on node %s" % node)
7136
7137   def _GoStandalone(self):
7138     """Disconnect from the network.
7139
7140     """
7141     self.feedback_fn("* changing into standalone mode")
7142     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7143                                                self.instance.disks)
7144     for node, nres in result.items():
7145       nres.Raise("Cannot disconnect disks node %s" % node)
7146
7147   def _GoReconnect(self, multimaster):
7148     """Reconnect to the network.
7149
7150     """
7151     if multimaster:
7152       msg = "dual-master"
7153     else:
7154       msg = "single-master"
7155     self.feedback_fn("* changing disks into %s mode" % msg)
7156     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7157                                            self.instance.disks,
7158                                            self.instance.name, multimaster)
7159     for node, nres in result.items():
7160       nres.Raise("Cannot change disks config on node %s" % node)
7161
7162   def _ExecCleanup(self):
7163     """Try to cleanup after a failed migration.
7164
7165     The cleanup is done by:
7166       - check that the instance is running only on one node
7167         (and update the config if needed)
7168       - change disks on its secondary node to secondary
7169       - wait until disks are fully synchronized
7170       - disconnect from the network
7171       - change disks into single-master mode
7172       - wait again until disks are fully synchronized
7173
7174     """
7175     instance = self.instance
7176     target_node = self.target_node
7177     source_node = self.source_node
7178
7179     # check running on only one node
7180     self.feedback_fn("* checking where the instance actually runs"
7181                      " (if this hangs, the hypervisor might be in"
7182                      " a bad state)")
7183     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7184     for node, result in ins_l.items():
7185       result.Raise("Can't contact node %s" % node)
7186
7187     runningon_source = instance.name in ins_l[source_node].payload
7188     runningon_target = instance.name in ins_l[target_node].payload
7189
7190     if runningon_source and runningon_target:
7191       raise errors.OpExecError("Instance seems to be running on two nodes,"
7192                                " or the hypervisor is confused; you will have"
7193                                " to ensure manually that it runs only on one"
7194                                " and restart this operation")
7195
7196     if not (runningon_source or runningon_target):
7197       raise errors.OpExecError("Instance does not seem to be running at all;"
7198                                " in this case it's safer to repair by"
7199                                " running 'gnt-instance stop' to ensure disk"
7200                                " shutdown, and then restarting it")
7201
7202     if runningon_target:
7203       # the migration has actually succeeded, we need to update the config
7204       self.feedback_fn("* instance running on secondary node (%s),"
7205                        " updating config" % target_node)
7206       instance.primary_node = target_node
7207       self.cfg.Update(instance, self.feedback_fn)
7208       demoted_node = source_node
7209     else:
7210       self.feedback_fn("* instance confirmed to be running on its"
7211                        " primary node (%s)" % source_node)
7212       demoted_node = target_node
7213
7214     if instance.disk_template in constants.DTS_INT_MIRROR:
7215       self._EnsureSecondary(demoted_node)
7216       try:
7217         self._WaitUntilSync()
7218       except errors.OpExecError:
7219         # we ignore here errors, since if the device is standalone, it
7220         # won't be able to sync
7221         pass
7222       self._GoStandalone()
7223       self._GoReconnect(False)
7224       self._WaitUntilSync()
7225
7226     self.feedback_fn("* done")
7227
7228   def _RevertDiskStatus(self):
7229     """Try to revert the disk status after a failed migration.
7230
7231     """
7232     target_node = self.target_node
7233     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7234       return
7235
7236     try:
7237       self._EnsureSecondary(target_node)
7238       self._GoStandalone()
7239       self._GoReconnect(False)
7240       self._WaitUntilSync()
7241     except errors.OpExecError, err:
7242       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7243                          " please try to recover the instance manually;"
7244                          " error '%s'" % str(err))
7245
7246   def _AbortMigration(self):
7247     """Call the hypervisor code to abort a started migration.
7248
7249     """
7250     instance = self.instance
7251     target_node = self.target_node
7252     migration_info = self.migration_info
7253
7254     abort_result = self.rpc.call_finalize_migration(target_node,
7255                                                     instance,
7256                                                     migration_info,
7257                                                     False)
7258     abort_msg = abort_result.fail_msg
7259     if abort_msg:
7260       logging.error("Aborting migration failed on target node %s: %s",
7261                     target_node, abort_msg)
7262       # Don't raise an exception here, as we stil have to try to revert the
7263       # disk status, even if this step failed.
7264
7265   def _ExecMigration(self):
7266     """Migrate an instance.
7267
7268     The migrate is done by:
7269       - change the disks into dual-master mode
7270       - wait until disks are fully synchronized again
7271       - migrate the instance
7272       - change disks on the new secondary node (the old primary) to secondary
7273       - wait until disks are fully synchronized
7274       - change disks into single-master mode
7275
7276     """
7277     instance = self.instance
7278     target_node = self.target_node
7279     source_node = self.source_node
7280
7281     self.feedback_fn("* checking disk consistency between source and target")
7282     for dev in instance.disks:
7283       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7284         raise errors.OpExecError("Disk %s is degraded or not fully"
7285                                  " synchronized on target node,"
7286                                  " aborting migration" % dev.iv_name)
7287
7288     # First get the migration information from the remote node
7289     result = self.rpc.call_migration_info(source_node, instance)
7290     msg = result.fail_msg
7291     if msg:
7292       log_err = ("Failed fetching source migration information from %s: %s" %
7293                  (source_node, msg))
7294       logging.error(log_err)
7295       raise errors.OpExecError(log_err)
7296
7297     self.migration_info = migration_info = result.payload
7298
7299     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7300       # Then switch the disks to master/master mode
7301       self._EnsureSecondary(target_node)
7302       self._GoStandalone()
7303       self._GoReconnect(True)
7304       self._WaitUntilSync()
7305
7306     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7307     result = self.rpc.call_accept_instance(target_node,
7308                                            instance,
7309                                            migration_info,
7310                                            self.nodes_ip[target_node])
7311
7312     msg = result.fail_msg
7313     if msg:
7314       logging.error("Instance pre-migration failed, trying to revert"
7315                     " disk status: %s", msg)
7316       self.feedback_fn("Pre-migration failed, aborting")
7317       self._AbortMigration()
7318       self._RevertDiskStatus()
7319       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7320                                (instance.name, msg))
7321
7322     self.feedback_fn("* migrating instance to %s" % target_node)
7323     result = self.rpc.call_instance_migrate(source_node, instance,
7324                                             self.nodes_ip[target_node],
7325                                             self.live)
7326     msg = result.fail_msg
7327     if msg:
7328       logging.error("Instance migration failed, trying to revert"
7329                     " disk status: %s", msg)
7330       self.feedback_fn("Migration failed, aborting")
7331       self._AbortMigration()
7332       self._RevertDiskStatus()
7333       raise errors.OpExecError("Could not migrate instance %s: %s" %
7334                                (instance.name, msg))
7335
7336     instance.primary_node = target_node
7337     # distribute new instance config to the other nodes
7338     self.cfg.Update(instance, self.feedback_fn)
7339
7340     result = self.rpc.call_finalize_migration(target_node,
7341                                               instance,
7342                                               migration_info,
7343                                               True)
7344     msg = result.fail_msg
7345     if msg:
7346       logging.error("Instance migration succeeded, but finalization failed:"
7347                     " %s", msg)
7348       raise errors.OpExecError("Could not finalize instance migration: %s" %
7349                                msg)
7350
7351     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7352       self._EnsureSecondary(source_node)
7353       self._WaitUntilSync()
7354       self._GoStandalone()
7355       self._GoReconnect(False)
7356       self._WaitUntilSync()
7357
7358     self.feedback_fn("* done")
7359
7360   def _ExecFailover(self):
7361     """Failover an instance.
7362
7363     The failover is done by shutting it down on its present node and
7364     starting it on the secondary.
7365
7366     """
7367     instance = self.instance
7368     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7369
7370     source_node = instance.primary_node
7371     target_node = self.target_node
7372
7373     if instance.admin_up:
7374       self.feedback_fn("* checking disk consistency between source and target")
7375       for dev in instance.disks:
7376         # for drbd, these are drbd over lvm
7377         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7378           if primary_node.offline:
7379             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7380                              " target node %s" %
7381                              (primary_node.name, dev.iv_name, target_node))
7382           elif not self.ignore_consistency:
7383             raise errors.OpExecError("Disk %s is degraded on target node,"
7384                                      " aborting failover" % dev.iv_name)
7385     else:
7386       self.feedback_fn("* not checking disk consistency as instance is not"
7387                        " running")
7388
7389     self.feedback_fn("* shutting down instance on source node")
7390     logging.info("Shutting down instance %s on node %s",
7391                  instance.name, source_node)
7392
7393     result = self.rpc.call_instance_shutdown(source_node, instance,
7394                                              self.shutdown_timeout)
7395     msg = result.fail_msg
7396     if msg:
7397       if self.ignore_consistency or primary_node.offline:
7398         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7399                            " proceeding anyway; please make sure node"
7400                            " %s is down; error details: %s",
7401                            instance.name, source_node, source_node, msg)
7402       else:
7403         raise errors.OpExecError("Could not shutdown instance %s on"
7404                                  " node %s: %s" %
7405                                  (instance.name, source_node, msg))
7406
7407     self.feedback_fn("* deactivating the instance's disks on source node")
7408     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7409       raise errors.OpExecError("Can't shut down the instance's disks")
7410
7411     instance.primary_node = target_node
7412     # distribute new instance config to the other nodes
7413     self.cfg.Update(instance, self.feedback_fn)
7414
7415     # Only start the instance if it's marked as up
7416     if instance.admin_up:
7417       self.feedback_fn("* activating the instance's disks on target node %s" %
7418                        target_node)
7419       logging.info("Starting instance %s on node %s",
7420                    instance.name, target_node)
7421
7422       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7423                                            ignore_secondaries=True)
7424       if not disks_ok:
7425         _ShutdownInstanceDisks(self.lu, instance)
7426         raise errors.OpExecError("Can't activate the instance's disks")
7427
7428       self.feedback_fn("* starting the instance on the target node %s" %
7429                        target_node)
7430       result = self.rpc.call_instance_start(target_node, instance, None, None,
7431                                             False)
7432       msg = result.fail_msg
7433       if msg:
7434         _ShutdownInstanceDisks(self.lu, instance)
7435         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7436                                  (instance.name, target_node, msg))
7437
7438   def Exec(self, feedback_fn):
7439     """Perform the migration.
7440
7441     """
7442     self.feedback_fn = feedback_fn
7443     self.source_node = self.instance.primary_node
7444
7445     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7446     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7447       self.target_node = self.instance.secondary_nodes[0]
7448       # Otherwise self.target_node has been populated either
7449       # directly, or through an iallocator.
7450
7451     self.all_nodes = [self.source_node, self.target_node]
7452     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7453                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7454
7455     if self.failover:
7456       feedback_fn("Failover instance %s" % self.instance.name)
7457       self._ExecFailover()
7458     else:
7459       feedback_fn("Migrating instance %s" % self.instance.name)
7460
7461       if self.cleanup:
7462         return self._ExecCleanup()
7463       else:
7464         return self._ExecMigration()
7465
7466
7467 def _CreateBlockDev(lu, node, instance, device, force_create,
7468                     info, force_open):
7469   """Create a tree of block devices on a given node.
7470
7471   If this device type has to be created on secondaries, create it and
7472   all its children.
7473
7474   If not, just recurse to children keeping the same 'force' value.
7475
7476   @param lu: the lu on whose behalf we execute
7477   @param node: the node on which to create the device
7478   @type instance: L{objects.Instance}
7479   @param instance: the instance which owns the device
7480   @type device: L{objects.Disk}
7481   @param device: the device to create
7482   @type force_create: boolean
7483   @param force_create: whether to force creation of this device; this
7484       will be change to True whenever we find a device which has
7485       CreateOnSecondary() attribute
7486   @param info: the extra 'metadata' we should attach to the device
7487       (this will be represented as a LVM tag)
7488   @type force_open: boolean
7489   @param force_open: this parameter will be passes to the
7490       L{backend.BlockdevCreate} function where it specifies
7491       whether we run on primary or not, and it affects both
7492       the child assembly and the device own Open() execution
7493
7494   """
7495   if device.CreateOnSecondary():
7496     force_create = True
7497
7498   if device.children:
7499     for child in device.children:
7500       _CreateBlockDev(lu, node, instance, child, force_create,
7501                       info, force_open)
7502
7503   if not force_create:
7504     return
7505
7506   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7507
7508
7509 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7510   """Create a single block device on a given node.
7511
7512   This will not recurse over children of the device, so they must be
7513   created in advance.
7514
7515   @param lu: the lu on whose behalf we execute
7516   @param node: the node on which to create the device
7517   @type instance: L{objects.Instance}
7518   @param instance: the instance which owns the device
7519   @type device: L{objects.Disk}
7520   @param device: the device to create
7521   @param info: the extra 'metadata' we should attach to the device
7522       (this will be represented as a LVM tag)
7523   @type force_open: boolean
7524   @param force_open: this parameter will be passes to the
7525       L{backend.BlockdevCreate} function where it specifies
7526       whether we run on primary or not, and it affects both
7527       the child assembly and the device own Open() execution
7528
7529   """
7530   lu.cfg.SetDiskID(device, node)
7531   result = lu.rpc.call_blockdev_create(node, device, device.size,
7532                                        instance.name, force_open, info)
7533   result.Raise("Can't create block device %s on"
7534                " node %s for instance %s" % (device, node, instance.name))
7535   if device.physical_id is None:
7536     device.physical_id = result.payload
7537
7538
7539 def _GenerateUniqueNames(lu, exts):
7540   """Generate a suitable LV name.
7541
7542   This will generate a logical volume name for the given instance.
7543
7544   """
7545   results = []
7546   for val in exts:
7547     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7548     results.append("%s%s" % (new_id, val))
7549   return results
7550
7551
7552 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7553                          iv_name, p_minor, s_minor):
7554   """Generate a drbd8 device complete with its children.
7555
7556   """
7557   assert len(vgnames) == len(names) == 2
7558   port = lu.cfg.AllocatePort()
7559   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7560   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7561                           logical_id=(vgnames[0], names[0]))
7562   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7563                           logical_id=(vgnames[1], names[1]))
7564   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7565                           logical_id=(primary, secondary, port,
7566                                       p_minor, s_minor,
7567                                       shared_secret),
7568                           children=[dev_data, dev_meta],
7569                           iv_name=iv_name)
7570   return drbd_dev
7571
7572
7573 def _GenerateDiskTemplate(lu, template_name,
7574                           instance_name, primary_node,
7575                           secondary_nodes, disk_info,
7576                           file_storage_dir, file_driver,
7577                           base_index, feedback_fn):
7578   """Generate the entire disk layout for a given template type.
7579
7580   """
7581   #TODO: compute space requirements
7582
7583   vgname = lu.cfg.GetVGName()
7584   disk_count = len(disk_info)
7585   disks = []
7586   if template_name == constants.DT_DISKLESS:
7587     pass
7588   elif template_name == constants.DT_PLAIN:
7589     if len(secondary_nodes) != 0:
7590       raise errors.ProgrammerError("Wrong template configuration")
7591
7592     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7593                                       for i in range(disk_count)])
7594     for idx, disk in enumerate(disk_info):
7595       disk_index = idx + base_index
7596       vg = disk.get(constants.IDISK_VG, vgname)
7597       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7598       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7599                               size=disk[constants.IDISK_SIZE],
7600                               logical_id=(vg, names[idx]),
7601                               iv_name="disk/%d" % disk_index,
7602                               mode=disk[constants.IDISK_MODE])
7603       disks.append(disk_dev)
7604   elif template_name == constants.DT_DRBD8:
7605     if len(secondary_nodes) != 1:
7606       raise errors.ProgrammerError("Wrong template configuration")
7607     remote_node = secondary_nodes[0]
7608     minors = lu.cfg.AllocateDRBDMinor(
7609       [primary_node, remote_node] * len(disk_info), instance_name)
7610
7611     names = []
7612     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7613                                                for i in range(disk_count)]):
7614       names.append(lv_prefix + "_data")
7615       names.append(lv_prefix + "_meta")
7616     for idx, disk in enumerate(disk_info):
7617       disk_index = idx + base_index
7618       data_vg = disk.get(constants.IDISK_VG, vgname)
7619       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7620       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7621                                       disk[constants.IDISK_SIZE],
7622                                       [data_vg, meta_vg],
7623                                       names[idx * 2:idx * 2 + 2],
7624                                       "disk/%d" % disk_index,
7625                                       minors[idx * 2], minors[idx * 2 + 1])
7626       disk_dev.mode = disk[constants.IDISK_MODE]
7627       disks.append(disk_dev)
7628   elif template_name == constants.DT_FILE:
7629     if len(secondary_nodes) != 0:
7630       raise errors.ProgrammerError("Wrong template configuration")
7631
7632     opcodes.RequireFileStorage()
7633
7634     for idx, disk in enumerate(disk_info):
7635       disk_index = idx + base_index
7636       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7637                               size=disk[constants.IDISK_SIZE],
7638                               iv_name="disk/%d" % disk_index,
7639                               logical_id=(file_driver,
7640                                           "%s/disk%d" % (file_storage_dir,
7641                                                          disk_index)),
7642                               mode=disk[constants.IDISK_MODE])
7643       disks.append(disk_dev)
7644   elif template_name == constants.DT_SHARED_FILE:
7645     if len(secondary_nodes) != 0:
7646       raise errors.ProgrammerError("Wrong template configuration")
7647
7648     opcodes.RequireSharedFileStorage()
7649
7650     for idx, disk in enumerate(disk_info):
7651       disk_index = idx + base_index
7652       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7653                               size=disk[constants.IDISK_SIZE],
7654                               iv_name="disk/%d" % disk_index,
7655                               logical_id=(file_driver,
7656                                           "%s/disk%d" % (file_storage_dir,
7657                                                          disk_index)),
7658                               mode=disk[constants.IDISK_MODE])
7659       disks.append(disk_dev)
7660   elif template_name == constants.DT_BLOCK:
7661     if len(secondary_nodes) != 0:
7662       raise errors.ProgrammerError("Wrong template configuration")
7663
7664     for idx, disk in enumerate(disk_info):
7665       disk_index = idx + base_index
7666       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7667                               size=disk[constants.IDISK_SIZE],
7668                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7669                                           disk[constants.IDISK_ADOPT]),
7670                               iv_name="disk/%d" % disk_index,
7671                               mode=disk[constants.IDISK_MODE])
7672       disks.append(disk_dev)
7673
7674   else:
7675     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7676   return disks
7677
7678
7679 def _GetInstanceInfoText(instance):
7680   """Compute that text that should be added to the disk's metadata.
7681
7682   """
7683   return "originstname+%s" % instance.name
7684
7685
7686 def _CalcEta(time_taken, written, total_size):
7687   """Calculates the ETA based on size written and total size.
7688
7689   @param time_taken: The time taken so far
7690   @param written: amount written so far
7691   @param total_size: The total size of data to be written
7692   @return: The remaining time in seconds
7693
7694   """
7695   avg_time = time_taken / float(written)
7696   return (total_size - written) * avg_time
7697
7698
7699 def _WipeDisks(lu, instance):
7700   """Wipes instance disks.
7701
7702   @type lu: L{LogicalUnit}
7703   @param lu: the logical unit on whose behalf we execute
7704   @type instance: L{objects.Instance}
7705   @param instance: the instance whose disks we should create
7706   @return: the success of the wipe
7707
7708   """
7709   node = instance.primary_node
7710
7711   for device in instance.disks:
7712     lu.cfg.SetDiskID(device, node)
7713
7714   logging.info("Pause sync of instance %s disks", instance.name)
7715   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7716
7717   for idx, success in enumerate(result.payload):
7718     if not success:
7719       logging.warn("pause-sync of instance %s for disks %d failed",
7720                    instance.name, idx)
7721
7722   try:
7723     for idx, device in enumerate(instance.disks):
7724       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7725       # MAX_WIPE_CHUNK at max
7726       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7727                             constants.MIN_WIPE_CHUNK_PERCENT)
7728       # we _must_ make this an int, otherwise rounding errors will
7729       # occur
7730       wipe_chunk_size = int(wipe_chunk_size)
7731
7732       lu.LogInfo("* Wiping disk %d", idx)
7733       logging.info("Wiping disk %d for instance %s, node %s using"
7734                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7735
7736       offset = 0
7737       size = device.size
7738       last_output = 0
7739       start_time = time.time()
7740
7741       while offset < size:
7742         wipe_size = min(wipe_chunk_size, size - offset)
7743         logging.debug("Wiping disk %d, offset %s, chunk %s",
7744                       idx, offset, wipe_size)
7745         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7746         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7747                      (idx, offset, wipe_size))
7748         now = time.time()
7749         offset += wipe_size
7750         if now - last_output >= 60:
7751           eta = _CalcEta(now - start_time, offset, size)
7752           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7753                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7754           last_output = now
7755   finally:
7756     logging.info("Resume sync of instance %s disks", instance.name)
7757
7758     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7759
7760     for idx, success in enumerate(result.payload):
7761       if not success:
7762         lu.LogWarning("Resume sync of disk %d failed, please have a"
7763                       " look at the status and troubleshoot the issue", idx)
7764         logging.warn("resume-sync of instance %s for disks %d failed",
7765                      instance.name, idx)
7766
7767
7768 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7769   """Create all disks for an instance.
7770
7771   This abstracts away some work from AddInstance.
7772
7773   @type lu: L{LogicalUnit}
7774   @param lu: the logical unit on whose behalf we execute
7775   @type instance: L{objects.Instance}
7776   @param instance: the instance whose disks we should create
7777   @type to_skip: list
7778   @param to_skip: list of indices to skip
7779   @type target_node: string
7780   @param target_node: if passed, overrides the target node for creation
7781   @rtype: boolean
7782   @return: the success of the creation
7783
7784   """
7785   info = _GetInstanceInfoText(instance)
7786   if target_node is None:
7787     pnode = instance.primary_node
7788     all_nodes = instance.all_nodes
7789   else:
7790     pnode = target_node
7791     all_nodes = [pnode]
7792
7793   if instance.disk_template in constants.DTS_FILEBASED:
7794     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7795     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7796
7797     result.Raise("Failed to create directory '%s' on"
7798                  " node %s" % (file_storage_dir, pnode))
7799
7800   # Note: this needs to be kept in sync with adding of disks in
7801   # LUInstanceSetParams
7802   for idx, device in enumerate(instance.disks):
7803     if to_skip and idx in to_skip:
7804       continue
7805     logging.info("Creating volume %s for instance %s",
7806                  device.iv_name, instance.name)
7807     #HARDCODE
7808     for node in all_nodes:
7809       f_create = node == pnode
7810       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7811
7812
7813 def _RemoveDisks(lu, instance, target_node=None):
7814   """Remove all disks for an instance.
7815
7816   This abstracts away some work from `AddInstance()` and
7817   `RemoveInstance()`. Note that in case some of the devices couldn't
7818   be removed, the removal will continue with the other ones (compare
7819   with `_CreateDisks()`).
7820
7821   @type lu: L{LogicalUnit}
7822   @param lu: the logical unit on whose behalf we execute
7823   @type instance: L{objects.Instance}
7824   @param instance: the instance whose disks we should remove
7825   @type target_node: string
7826   @param target_node: used to override the node on which to remove the disks
7827   @rtype: boolean
7828   @return: the success of the removal
7829
7830   """
7831   logging.info("Removing block devices for instance %s", instance.name)
7832
7833   all_result = True
7834   for device in instance.disks:
7835     if target_node:
7836       edata = [(target_node, device)]
7837     else:
7838       edata = device.ComputeNodeTree(instance.primary_node)
7839     for node, disk in edata:
7840       lu.cfg.SetDiskID(disk, node)
7841       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7842       if msg:
7843         lu.LogWarning("Could not remove block device %s on node %s,"
7844                       " continuing anyway: %s", device.iv_name, node, msg)
7845         all_result = False
7846
7847   if instance.disk_template == constants.DT_FILE:
7848     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7849     if target_node:
7850       tgt = target_node
7851     else:
7852       tgt = instance.primary_node
7853     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7854     if result.fail_msg:
7855       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7856                     file_storage_dir, instance.primary_node, result.fail_msg)
7857       all_result = False
7858
7859   return all_result
7860
7861
7862 def _ComputeDiskSizePerVG(disk_template, disks):
7863   """Compute disk size requirements in the volume group
7864
7865   """
7866   def _compute(disks, payload):
7867     """Universal algorithm.
7868
7869     """
7870     vgs = {}
7871     for disk in disks:
7872       vgs[disk[constants.IDISK_VG]] = \
7873         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7874
7875     return vgs
7876
7877   # Required free disk space as a function of disk and swap space
7878   req_size_dict = {
7879     constants.DT_DISKLESS: {},
7880     constants.DT_PLAIN: _compute(disks, 0),
7881     # 128 MB are added for drbd metadata for each disk
7882     constants.DT_DRBD8: _compute(disks, 128),
7883     constants.DT_FILE: {},
7884     constants.DT_SHARED_FILE: {},
7885   }
7886
7887   if disk_template not in req_size_dict:
7888     raise errors.ProgrammerError("Disk template '%s' size requirement"
7889                                  " is unknown" %  disk_template)
7890
7891   return req_size_dict[disk_template]
7892
7893
7894 def _ComputeDiskSize(disk_template, disks):
7895   """Compute disk size requirements in the volume group
7896
7897   """
7898   # Required free disk space as a function of disk and swap space
7899   req_size_dict = {
7900     constants.DT_DISKLESS: None,
7901     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7902     # 128 MB are added for drbd metadata for each disk
7903     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7904     constants.DT_FILE: None,
7905     constants.DT_SHARED_FILE: 0,
7906     constants.DT_BLOCK: 0,
7907   }
7908
7909   if disk_template not in req_size_dict:
7910     raise errors.ProgrammerError("Disk template '%s' size requirement"
7911                                  " is unknown" %  disk_template)
7912
7913   return req_size_dict[disk_template]
7914
7915
7916 def _FilterVmNodes(lu, nodenames):
7917   """Filters out non-vm_capable nodes from a list.
7918
7919   @type lu: L{LogicalUnit}
7920   @param lu: the logical unit for which we check
7921   @type nodenames: list
7922   @param nodenames: the list of nodes on which we should check
7923   @rtype: list
7924   @return: the list of vm-capable nodes
7925
7926   """
7927   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7928   return [name for name in nodenames if name not in vm_nodes]
7929
7930
7931 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7932   """Hypervisor parameter validation.
7933
7934   This function abstract the hypervisor parameter validation to be
7935   used in both instance create and instance modify.
7936
7937   @type lu: L{LogicalUnit}
7938   @param lu: the logical unit for which we check
7939   @type nodenames: list
7940   @param nodenames: the list of nodes on which we should check
7941   @type hvname: string
7942   @param hvname: the name of the hypervisor we should use
7943   @type hvparams: dict
7944   @param hvparams: the parameters which we need to check
7945   @raise errors.OpPrereqError: if the parameters are not valid
7946
7947   """
7948   nodenames = _FilterVmNodes(lu, nodenames)
7949   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7950                                                   hvname,
7951                                                   hvparams)
7952   for node in nodenames:
7953     info = hvinfo[node]
7954     if info.offline:
7955       continue
7956     info.Raise("Hypervisor parameter validation failed on node %s" % node)
7957
7958
7959 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7960   """OS parameters validation.
7961
7962   @type lu: L{LogicalUnit}
7963   @param lu: the logical unit for which we check
7964   @type required: boolean
7965   @param required: whether the validation should fail if the OS is not
7966       found
7967   @type nodenames: list
7968   @param nodenames: the list of nodes on which we should check
7969   @type osname: string
7970   @param osname: the name of the hypervisor we should use
7971   @type osparams: dict
7972   @param osparams: the parameters which we need to check
7973   @raise errors.OpPrereqError: if the parameters are not valid
7974
7975   """
7976   nodenames = _FilterVmNodes(lu, nodenames)
7977   result = lu.rpc.call_os_validate(required, nodenames, osname,
7978                                    [constants.OS_VALIDATE_PARAMETERS],
7979                                    osparams)
7980   for node, nres in result.items():
7981     # we don't check for offline cases since this should be run only
7982     # against the master node and/or an instance's nodes
7983     nres.Raise("OS Parameters validation failed on node %s" % node)
7984     if not nres.payload:
7985       lu.LogInfo("OS %s not found on node %s, validation skipped",
7986                  osname, node)
7987
7988
7989 class LUInstanceCreate(LogicalUnit):
7990   """Create an instance.
7991
7992   """
7993   HPATH = "instance-add"
7994   HTYPE = constants.HTYPE_INSTANCE
7995   REQ_BGL = False
7996
7997   def CheckArguments(self):
7998     """Check arguments.
7999
8000     """
8001     # do not require name_check to ease forward/backward compatibility
8002     # for tools
8003     if self.op.no_install and self.op.start:
8004       self.LogInfo("No-installation mode selected, disabling startup")
8005       self.op.start = False
8006     # validate/normalize the instance name
8007     self.op.instance_name = \
8008       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8009
8010     if self.op.ip_check and not self.op.name_check:
8011       # TODO: make the ip check more flexible and not depend on the name check
8012       raise errors.OpPrereqError("Cannot do IP address check without a name"
8013                                  " check", errors.ECODE_INVAL)
8014
8015     # check nics' parameter names
8016     for nic in self.op.nics:
8017       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8018
8019     # check disks. parameter names and consistent adopt/no-adopt strategy
8020     has_adopt = has_no_adopt = False
8021     for disk in self.op.disks:
8022       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8023       if constants.IDISK_ADOPT in disk:
8024         has_adopt = True
8025       else:
8026         has_no_adopt = True
8027     if has_adopt and has_no_adopt:
8028       raise errors.OpPrereqError("Either all disks are adopted or none is",
8029                                  errors.ECODE_INVAL)
8030     if has_adopt:
8031       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8032         raise errors.OpPrereqError("Disk adoption is not supported for the"
8033                                    " '%s' disk template" %
8034                                    self.op.disk_template,
8035                                    errors.ECODE_INVAL)
8036       if self.op.iallocator is not None:
8037         raise errors.OpPrereqError("Disk adoption not allowed with an"
8038                                    " iallocator script", errors.ECODE_INVAL)
8039       if self.op.mode == constants.INSTANCE_IMPORT:
8040         raise errors.OpPrereqError("Disk adoption not allowed for"
8041                                    " instance import", errors.ECODE_INVAL)
8042     else:
8043       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8044         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8045                                    " but no 'adopt' parameter given" %
8046                                    self.op.disk_template,
8047                                    errors.ECODE_INVAL)
8048
8049     self.adopt_disks = has_adopt
8050
8051     # instance name verification
8052     if self.op.name_check:
8053       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8054       self.op.instance_name = self.hostname1.name
8055       # used in CheckPrereq for ip ping check
8056       self.check_ip = self.hostname1.ip
8057     else:
8058       self.check_ip = None
8059
8060     # file storage checks
8061     if (self.op.file_driver and
8062         not self.op.file_driver in constants.FILE_DRIVER):
8063       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8064                                  self.op.file_driver, errors.ECODE_INVAL)
8065
8066     if self.op.disk_template == constants.DT_FILE:
8067       opcodes.RequireFileStorage()
8068     elif self.op.disk_template == constants.DT_SHARED_FILE:
8069       opcodes.RequireSharedFileStorage()
8070
8071     ### Node/iallocator related checks
8072     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8073
8074     if self.op.pnode is not None:
8075       if self.op.disk_template in constants.DTS_INT_MIRROR:
8076         if self.op.snode is None:
8077           raise errors.OpPrereqError("The networked disk templates need"
8078                                      " a mirror node", errors.ECODE_INVAL)
8079       elif self.op.snode:
8080         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8081                         " template")
8082         self.op.snode = None
8083
8084     self._cds = _GetClusterDomainSecret()
8085
8086     if self.op.mode == constants.INSTANCE_IMPORT:
8087       # On import force_variant must be True, because if we forced it at
8088       # initial install, our only chance when importing it back is that it
8089       # works again!
8090       self.op.force_variant = True
8091
8092       if self.op.no_install:
8093         self.LogInfo("No-installation mode has no effect during import")
8094
8095     elif self.op.mode == constants.INSTANCE_CREATE:
8096       if self.op.os_type is None:
8097         raise errors.OpPrereqError("No guest OS specified",
8098                                    errors.ECODE_INVAL)
8099       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8100         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8101                                    " installation" % self.op.os_type,
8102                                    errors.ECODE_STATE)
8103       if self.op.disk_template is None:
8104         raise errors.OpPrereqError("No disk template specified",
8105                                    errors.ECODE_INVAL)
8106
8107     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8108       # Check handshake to ensure both clusters have the same domain secret
8109       src_handshake = self.op.source_handshake
8110       if not src_handshake:
8111         raise errors.OpPrereqError("Missing source handshake",
8112                                    errors.ECODE_INVAL)
8113
8114       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8115                                                            src_handshake)
8116       if errmsg:
8117         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8118                                    errors.ECODE_INVAL)
8119
8120       # Load and check source CA
8121       self.source_x509_ca_pem = self.op.source_x509_ca
8122       if not self.source_x509_ca_pem:
8123         raise errors.OpPrereqError("Missing source X509 CA",
8124                                    errors.ECODE_INVAL)
8125
8126       try:
8127         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8128                                                     self._cds)
8129       except OpenSSL.crypto.Error, err:
8130         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8131                                    (err, ), errors.ECODE_INVAL)
8132
8133       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8134       if errcode is not None:
8135         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8136                                    errors.ECODE_INVAL)
8137
8138       self.source_x509_ca = cert
8139
8140       src_instance_name = self.op.source_instance_name
8141       if not src_instance_name:
8142         raise errors.OpPrereqError("Missing source instance name",
8143                                    errors.ECODE_INVAL)
8144
8145       self.source_instance_name = \
8146           netutils.GetHostname(name=src_instance_name).name
8147
8148     else:
8149       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8150                                  self.op.mode, errors.ECODE_INVAL)
8151
8152   def ExpandNames(self):
8153     """ExpandNames for CreateInstance.
8154
8155     Figure out the right locks for instance creation.
8156
8157     """
8158     self.needed_locks = {}
8159
8160     instance_name = self.op.instance_name
8161     # this is just a preventive check, but someone might still add this
8162     # instance in the meantime, and creation will fail at lock-add time
8163     if instance_name in self.cfg.GetInstanceList():
8164       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8165                                  instance_name, errors.ECODE_EXISTS)
8166
8167     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8168
8169     if self.op.iallocator:
8170       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8171     else:
8172       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8173       nodelist = [self.op.pnode]
8174       if self.op.snode is not None:
8175         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8176         nodelist.append(self.op.snode)
8177       self.needed_locks[locking.LEVEL_NODE] = nodelist
8178
8179     # in case of import lock the source node too
8180     if self.op.mode == constants.INSTANCE_IMPORT:
8181       src_node = self.op.src_node
8182       src_path = self.op.src_path
8183
8184       if src_path is None:
8185         self.op.src_path = src_path = self.op.instance_name
8186
8187       if src_node is None:
8188         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8189         self.op.src_node = None
8190         if os.path.isabs(src_path):
8191           raise errors.OpPrereqError("Importing an instance from an absolute"
8192                                      " path requires a source node option",
8193                                      errors.ECODE_INVAL)
8194       else:
8195         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8196         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8197           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8198         if not os.path.isabs(src_path):
8199           self.op.src_path = src_path = \
8200             utils.PathJoin(constants.EXPORT_DIR, src_path)
8201
8202   def _RunAllocator(self):
8203     """Run the allocator based on input opcode.
8204
8205     """
8206     nics = [n.ToDict() for n in self.nics]
8207     ial = IAllocator(self.cfg, self.rpc,
8208                      mode=constants.IALLOCATOR_MODE_ALLOC,
8209                      name=self.op.instance_name,
8210                      disk_template=self.op.disk_template,
8211                      tags=self.op.tags,
8212                      os=self.op.os_type,
8213                      vcpus=self.be_full[constants.BE_VCPUS],
8214                      memory=self.be_full[constants.BE_MEMORY],
8215                      disks=self.disks,
8216                      nics=nics,
8217                      hypervisor=self.op.hypervisor,
8218                      )
8219
8220     ial.Run(self.op.iallocator)
8221
8222     if not ial.success:
8223       raise errors.OpPrereqError("Can't compute nodes using"
8224                                  " iallocator '%s': %s" %
8225                                  (self.op.iallocator, ial.info),
8226                                  errors.ECODE_NORES)
8227     if len(ial.result) != ial.required_nodes:
8228       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8229                                  " of nodes (%s), required %s" %
8230                                  (self.op.iallocator, len(ial.result),
8231                                   ial.required_nodes), errors.ECODE_FAULT)
8232     self.op.pnode = ial.result[0]
8233     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8234                  self.op.instance_name, self.op.iallocator,
8235                  utils.CommaJoin(ial.result))
8236     if ial.required_nodes == 2:
8237       self.op.snode = ial.result[1]
8238
8239   def BuildHooksEnv(self):
8240     """Build hooks env.
8241
8242     This runs on master, primary and secondary nodes of the instance.
8243
8244     """
8245     env = {
8246       "ADD_MODE": self.op.mode,
8247       }
8248     if self.op.mode == constants.INSTANCE_IMPORT:
8249       env["SRC_NODE"] = self.op.src_node
8250       env["SRC_PATH"] = self.op.src_path
8251       env["SRC_IMAGES"] = self.src_images
8252
8253     env.update(_BuildInstanceHookEnv(
8254       name=self.op.instance_name,
8255       primary_node=self.op.pnode,
8256       secondary_nodes=self.secondaries,
8257       status=self.op.start,
8258       os_type=self.op.os_type,
8259       memory=self.be_full[constants.BE_MEMORY],
8260       vcpus=self.be_full[constants.BE_VCPUS],
8261       nics=_NICListToTuple(self, self.nics),
8262       disk_template=self.op.disk_template,
8263       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8264              for d in self.disks],
8265       bep=self.be_full,
8266       hvp=self.hv_full,
8267       hypervisor_name=self.op.hypervisor,
8268       tags=self.op.tags,
8269     ))
8270
8271     return env
8272
8273   def BuildHooksNodes(self):
8274     """Build hooks nodes.
8275
8276     """
8277     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8278     return nl, nl
8279
8280   def _ReadExportInfo(self):
8281     """Reads the export information from disk.
8282
8283     It will override the opcode source node and path with the actual
8284     information, if these two were not specified before.
8285
8286     @return: the export information
8287
8288     """
8289     assert self.op.mode == constants.INSTANCE_IMPORT
8290
8291     src_node = self.op.src_node
8292     src_path = self.op.src_path
8293
8294     if src_node is None:
8295       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8296       exp_list = self.rpc.call_export_list(locked_nodes)
8297       found = False
8298       for node in exp_list:
8299         if exp_list[node].fail_msg:
8300           continue
8301         if src_path in exp_list[node].payload:
8302           found = True
8303           self.op.src_node = src_node = node
8304           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8305                                                        src_path)
8306           break
8307       if not found:
8308         raise errors.OpPrereqError("No export found for relative path %s" %
8309                                     src_path, errors.ECODE_INVAL)
8310
8311     _CheckNodeOnline(self, src_node)
8312     result = self.rpc.call_export_info(src_node, src_path)
8313     result.Raise("No export or invalid export found in dir %s" % src_path)
8314
8315     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8316     if not export_info.has_section(constants.INISECT_EXP):
8317       raise errors.ProgrammerError("Corrupted export config",
8318                                    errors.ECODE_ENVIRON)
8319
8320     ei_version = export_info.get(constants.INISECT_EXP, "version")
8321     if (int(ei_version) != constants.EXPORT_VERSION):
8322       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8323                                  (ei_version, constants.EXPORT_VERSION),
8324                                  errors.ECODE_ENVIRON)
8325     return export_info
8326
8327   def _ReadExportParams(self, einfo):
8328     """Use export parameters as defaults.
8329
8330     In case the opcode doesn't specify (as in override) some instance
8331     parameters, then try to use them from the export information, if
8332     that declares them.
8333
8334     """
8335     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8336
8337     if self.op.disk_template is None:
8338       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8339         self.op.disk_template = einfo.get(constants.INISECT_INS,
8340                                           "disk_template")
8341       else:
8342         raise errors.OpPrereqError("No disk template specified and the export"
8343                                    " is missing the disk_template information",
8344                                    errors.ECODE_INVAL)
8345
8346     if not self.op.disks:
8347       if einfo.has_option(constants.INISECT_INS, "disk_count"):
8348         disks = []
8349         # TODO: import the disk iv_name too
8350         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8351           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8352           disks.append({constants.IDISK_SIZE: disk_sz})
8353         self.op.disks = disks
8354       else:
8355         raise errors.OpPrereqError("No disk info specified and the export"
8356                                    " is missing the disk information",
8357                                    errors.ECODE_INVAL)
8358
8359     if (not self.op.nics and
8360         einfo.has_option(constants.INISECT_INS, "nic_count")):
8361       nics = []
8362       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8363         ndict = {}
8364         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8365           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8366           ndict[name] = v
8367         nics.append(ndict)
8368       self.op.nics = nics
8369
8370     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8371       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8372
8373     if (self.op.hypervisor is None and
8374         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8375       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8376
8377     if einfo.has_section(constants.INISECT_HYP):
8378       # use the export parameters but do not override the ones
8379       # specified by the user
8380       for name, value in einfo.items(constants.INISECT_HYP):
8381         if name not in self.op.hvparams:
8382           self.op.hvparams[name] = value
8383
8384     if einfo.has_section(constants.INISECT_BEP):
8385       # use the parameters, without overriding
8386       for name, value in einfo.items(constants.INISECT_BEP):
8387         if name not in self.op.beparams:
8388           self.op.beparams[name] = value
8389     else:
8390       # try to read the parameters old style, from the main section
8391       for name in constants.BES_PARAMETERS:
8392         if (name not in self.op.beparams and
8393             einfo.has_option(constants.INISECT_INS, name)):
8394           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8395
8396     if einfo.has_section(constants.INISECT_OSP):
8397       # use the parameters, without overriding
8398       for name, value in einfo.items(constants.INISECT_OSP):
8399         if name not in self.op.osparams:
8400           self.op.osparams[name] = value
8401
8402   def _RevertToDefaults(self, cluster):
8403     """Revert the instance parameters to the default values.
8404
8405     """
8406     # hvparams
8407     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8408     for name in self.op.hvparams.keys():
8409       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8410         del self.op.hvparams[name]
8411     # beparams
8412     be_defs = cluster.SimpleFillBE({})
8413     for name in self.op.beparams.keys():
8414       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8415         del self.op.beparams[name]
8416     # nic params
8417     nic_defs = cluster.SimpleFillNIC({})
8418     for nic in self.op.nics:
8419       for name in constants.NICS_PARAMETERS:
8420         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8421           del nic[name]
8422     # osparams
8423     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8424     for name in self.op.osparams.keys():
8425       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8426         del self.op.osparams[name]
8427
8428   def _CalculateFileStorageDir(self):
8429     """Calculate final instance file storage dir.
8430
8431     """
8432     # file storage dir calculation/check
8433     self.instance_file_storage_dir = None
8434     if self.op.disk_template in constants.DTS_FILEBASED:
8435       # build the full file storage dir path
8436       joinargs = []
8437
8438       if self.op.disk_template == constants.DT_SHARED_FILE:
8439         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8440       else:
8441         get_fsd_fn = self.cfg.GetFileStorageDir
8442
8443       cfg_storagedir = get_fsd_fn()
8444       if not cfg_storagedir:
8445         raise errors.OpPrereqError("Cluster file storage dir not defined")
8446       joinargs.append(cfg_storagedir)
8447
8448       if self.op.file_storage_dir is not None:
8449         joinargs.append(self.op.file_storage_dir)
8450
8451       joinargs.append(self.op.instance_name)
8452
8453       # pylint: disable-msg=W0142
8454       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8455
8456   def CheckPrereq(self):
8457     """Check prerequisites.
8458
8459     """
8460     self._CalculateFileStorageDir()
8461
8462     if self.op.mode == constants.INSTANCE_IMPORT:
8463       export_info = self._ReadExportInfo()
8464       self._ReadExportParams(export_info)
8465
8466     if (not self.cfg.GetVGName() and
8467         self.op.disk_template not in constants.DTS_NOT_LVM):
8468       raise errors.OpPrereqError("Cluster does not support lvm-based"
8469                                  " instances", errors.ECODE_STATE)
8470
8471     if self.op.hypervisor is None:
8472       self.op.hypervisor = self.cfg.GetHypervisorType()
8473
8474     cluster = self.cfg.GetClusterInfo()
8475     enabled_hvs = cluster.enabled_hypervisors
8476     if self.op.hypervisor not in enabled_hvs:
8477       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8478                                  " cluster (%s)" % (self.op.hypervisor,
8479                                   ",".join(enabled_hvs)),
8480                                  errors.ECODE_STATE)
8481
8482     # Check tag validity
8483     for tag in self.op.tags:
8484       objects.TaggableObject.ValidateTag(tag)
8485
8486     # check hypervisor parameter syntax (locally)
8487     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8488     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8489                                       self.op.hvparams)
8490     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8491     hv_type.CheckParameterSyntax(filled_hvp)
8492     self.hv_full = filled_hvp
8493     # check that we don't specify global parameters on an instance
8494     _CheckGlobalHvParams(self.op.hvparams)
8495
8496     # fill and remember the beparams dict
8497     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8498     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8499
8500     # build os parameters
8501     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8502
8503     # now that hvp/bep are in final format, let's reset to defaults,
8504     # if told to do so
8505     if self.op.identify_defaults:
8506       self._RevertToDefaults(cluster)
8507
8508     # NIC buildup
8509     self.nics = []
8510     for idx, nic in enumerate(self.op.nics):
8511       nic_mode_req = nic.get(constants.INIC_MODE, None)
8512       nic_mode = nic_mode_req
8513       if nic_mode is None:
8514         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8515
8516       # in routed mode, for the first nic, the default ip is 'auto'
8517       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8518         default_ip_mode = constants.VALUE_AUTO
8519       else:
8520         default_ip_mode = constants.VALUE_NONE
8521
8522       # ip validity checks
8523       ip = nic.get(constants.INIC_IP, default_ip_mode)
8524       if ip is None or ip.lower() == constants.VALUE_NONE:
8525         nic_ip = None
8526       elif ip.lower() == constants.VALUE_AUTO:
8527         if not self.op.name_check:
8528           raise errors.OpPrereqError("IP address set to auto but name checks"
8529                                      " have been skipped",
8530                                      errors.ECODE_INVAL)
8531         nic_ip = self.hostname1.ip
8532       else:
8533         if not netutils.IPAddress.IsValid(ip):
8534           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8535                                      errors.ECODE_INVAL)
8536         nic_ip = ip
8537
8538       # TODO: check the ip address for uniqueness
8539       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8540         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8541                                    errors.ECODE_INVAL)
8542
8543       # MAC address verification
8544       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8545       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8546         mac = utils.NormalizeAndValidateMac(mac)
8547
8548         try:
8549           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8550         except errors.ReservationError:
8551           raise errors.OpPrereqError("MAC address %s already in use"
8552                                      " in cluster" % mac,
8553                                      errors.ECODE_NOTUNIQUE)
8554
8555       #  Build nic parameters
8556       link = nic.get(constants.INIC_LINK, None)
8557       nicparams = {}
8558       if nic_mode_req:
8559         nicparams[constants.NIC_MODE] = nic_mode_req
8560       if link:
8561         nicparams[constants.NIC_LINK] = link
8562
8563       check_params = cluster.SimpleFillNIC(nicparams)
8564       objects.NIC.CheckParameterSyntax(check_params)
8565       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8566
8567     # disk checks/pre-build
8568     default_vg = self.cfg.GetVGName()
8569     self.disks = []
8570     for disk in self.op.disks:
8571       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8572       if mode not in constants.DISK_ACCESS_SET:
8573         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8574                                    mode, errors.ECODE_INVAL)
8575       size = disk.get(constants.IDISK_SIZE, None)
8576       if size is None:
8577         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8578       try:
8579         size = int(size)
8580       except (TypeError, ValueError):
8581         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8582                                    errors.ECODE_INVAL)
8583
8584       data_vg = disk.get(constants.IDISK_VG, default_vg)
8585       new_disk = {
8586         constants.IDISK_SIZE: size,
8587         constants.IDISK_MODE: mode,
8588         constants.IDISK_VG: data_vg,
8589         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8590         }
8591       if constants.IDISK_ADOPT in disk:
8592         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8593       self.disks.append(new_disk)
8594
8595     if self.op.mode == constants.INSTANCE_IMPORT:
8596
8597       # Check that the new instance doesn't have less disks than the export
8598       instance_disks = len(self.disks)
8599       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8600       if instance_disks < export_disks:
8601         raise errors.OpPrereqError("Not enough disks to import."
8602                                    " (instance: %d, export: %d)" %
8603                                    (instance_disks, export_disks),
8604                                    errors.ECODE_INVAL)
8605
8606       disk_images = []
8607       for idx in range(export_disks):
8608         option = "disk%d_dump" % idx
8609         if export_info.has_option(constants.INISECT_INS, option):
8610           # FIXME: are the old os-es, disk sizes, etc. useful?
8611           export_name = export_info.get(constants.INISECT_INS, option)
8612           image = utils.PathJoin(self.op.src_path, export_name)
8613           disk_images.append(image)
8614         else:
8615           disk_images.append(False)
8616
8617       self.src_images = disk_images
8618
8619       old_name = export_info.get(constants.INISECT_INS, "name")
8620       try:
8621         exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8622       except (TypeError, ValueError), err:
8623         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8624                                    " an integer: %s" % str(err),
8625                                    errors.ECODE_STATE)
8626       if self.op.instance_name == old_name:
8627         for idx, nic in enumerate(self.nics):
8628           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8629             nic_mac_ini = "nic%d_mac" % idx
8630             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8631
8632     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8633
8634     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8635     if self.op.ip_check:
8636       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8637         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8638                                    (self.check_ip, self.op.instance_name),
8639                                    errors.ECODE_NOTUNIQUE)
8640
8641     #### mac address generation
8642     # By generating here the mac address both the allocator and the hooks get
8643     # the real final mac address rather than the 'auto' or 'generate' value.
8644     # There is a race condition between the generation and the instance object
8645     # creation, which means that we know the mac is valid now, but we're not
8646     # sure it will be when we actually add the instance. If things go bad
8647     # adding the instance will abort because of a duplicate mac, and the
8648     # creation job will fail.
8649     for nic in self.nics:
8650       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8651         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8652
8653     #### allocator run
8654
8655     if self.op.iallocator is not None:
8656       self._RunAllocator()
8657
8658     #### node related checks
8659
8660     # check primary node
8661     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8662     assert self.pnode is not None, \
8663       "Cannot retrieve locked node %s" % self.op.pnode
8664     if pnode.offline:
8665       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8666                                  pnode.name, errors.ECODE_STATE)
8667     if pnode.drained:
8668       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8669                                  pnode.name, errors.ECODE_STATE)
8670     if not pnode.vm_capable:
8671       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8672                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8673
8674     self.secondaries = []
8675
8676     # mirror node verification
8677     if self.op.disk_template in constants.DTS_INT_MIRROR:
8678       if self.op.snode == pnode.name:
8679         raise errors.OpPrereqError("The secondary node cannot be the"
8680                                    " primary node", errors.ECODE_INVAL)
8681       _CheckNodeOnline(self, self.op.snode)
8682       _CheckNodeNotDrained(self, self.op.snode)
8683       _CheckNodeVmCapable(self, self.op.snode)
8684       self.secondaries.append(self.op.snode)
8685
8686     nodenames = [pnode.name] + self.secondaries
8687
8688     if not self.adopt_disks:
8689       # Check lv size requirements, if not adopting
8690       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8691       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8692
8693     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8694       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8695                                 disk[constants.IDISK_ADOPT])
8696                      for disk in self.disks])
8697       if len(all_lvs) != len(self.disks):
8698         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8699                                    errors.ECODE_INVAL)
8700       for lv_name in all_lvs:
8701         try:
8702           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8703           # to ReserveLV uses the same syntax
8704           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8705         except errors.ReservationError:
8706           raise errors.OpPrereqError("LV named %s used by another instance" %
8707                                      lv_name, errors.ECODE_NOTUNIQUE)
8708
8709       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8710       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8711
8712       node_lvs = self.rpc.call_lv_list([pnode.name],
8713                                        vg_names.payload.keys())[pnode.name]
8714       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8715       node_lvs = node_lvs.payload
8716
8717       delta = all_lvs.difference(node_lvs.keys())
8718       if delta:
8719         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8720                                    utils.CommaJoin(delta),
8721                                    errors.ECODE_INVAL)
8722       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8723       if online_lvs:
8724         raise errors.OpPrereqError("Online logical volumes found, cannot"
8725                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8726                                    errors.ECODE_STATE)
8727       # update the size of disk based on what is found
8728       for dsk in self.disks:
8729         dsk[constants.IDISK_SIZE] = \
8730           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8731                                         dsk[constants.IDISK_ADOPT])][0]))
8732
8733     elif self.op.disk_template == constants.DT_BLOCK:
8734       # Normalize and de-duplicate device paths
8735       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8736                        for disk in self.disks])
8737       if len(all_disks) != len(self.disks):
8738         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8739                                    errors.ECODE_INVAL)
8740       baddisks = [d for d in all_disks
8741                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8742       if baddisks:
8743         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8744                                    " cannot be adopted" %
8745                                    (", ".join(baddisks),
8746                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8747                                    errors.ECODE_INVAL)
8748
8749       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8750                                             list(all_disks))[pnode.name]
8751       node_disks.Raise("Cannot get block device information from node %s" %
8752                        pnode.name)
8753       node_disks = node_disks.payload
8754       delta = all_disks.difference(node_disks.keys())
8755       if delta:
8756         raise errors.OpPrereqError("Missing block device(s): %s" %
8757                                    utils.CommaJoin(delta),
8758                                    errors.ECODE_INVAL)
8759       for dsk in self.disks:
8760         dsk[constants.IDISK_SIZE] = \
8761           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8762
8763     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8764
8765     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8766     # check OS parameters (remotely)
8767     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8768
8769     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8770
8771     # memory check on primary node
8772     if self.op.start:
8773       _CheckNodeFreeMemory(self, self.pnode.name,
8774                            "creating instance %s" % self.op.instance_name,
8775                            self.be_full[constants.BE_MEMORY],
8776                            self.op.hypervisor)
8777
8778     self.dry_run_result = list(nodenames)
8779
8780   def Exec(self, feedback_fn):
8781     """Create and add the instance to the cluster.
8782
8783     """
8784     instance = self.op.instance_name
8785     pnode_name = self.pnode.name
8786
8787     ht_kind = self.op.hypervisor
8788     if ht_kind in constants.HTS_REQ_PORT:
8789       network_port = self.cfg.AllocatePort()
8790     else:
8791       network_port = None
8792
8793     disks = _GenerateDiskTemplate(self,
8794                                   self.op.disk_template,
8795                                   instance, pnode_name,
8796                                   self.secondaries,
8797                                   self.disks,
8798                                   self.instance_file_storage_dir,
8799                                   self.op.file_driver,
8800                                   0,
8801                                   feedback_fn)
8802
8803     iobj = objects.Instance(name=instance, os=self.op.os_type,
8804                             primary_node=pnode_name,
8805                             nics=self.nics, disks=disks,
8806                             disk_template=self.op.disk_template,
8807                             admin_up=False,
8808                             network_port=network_port,
8809                             beparams=self.op.beparams,
8810                             hvparams=self.op.hvparams,
8811                             hypervisor=self.op.hypervisor,
8812                             osparams=self.op.osparams,
8813                             )
8814
8815     if self.op.tags:
8816       for tag in self.op.tags:
8817         iobj.AddTag(tag)
8818
8819     if self.adopt_disks:
8820       if self.op.disk_template == constants.DT_PLAIN:
8821         # rename LVs to the newly-generated names; we need to construct
8822         # 'fake' LV disks with the old data, plus the new unique_id
8823         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8824         rename_to = []
8825         for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8826           rename_to.append(t_dsk.logical_id)
8827           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8828           self.cfg.SetDiskID(t_dsk, pnode_name)
8829         result = self.rpc.call_blockdev_rename(pnode_name,
8830                                                zip(tmp_disks, rename_to))
8831         result.Raise("Failed to rename adoped LVs")
8832     else:
8833       feedback_fn("* creating instance disks...")
8834       try:
8835         _CreateDisks(self, iobj)
8836       except errors.OpExecError:
8837         self.LogWarning("Device creation failed, reverting...")
8838         try:
8839           _RemoveDisks(self, iobj)
8840         finally:
8841           self.cfg.ReleaseDRBDMinors(instance)
8842           raise
8843
8844     feedback_fn("adding instance %s to cluster config" % instance)
8845
8846     self.cfg.AddInstance(iobj, self.proc.GetECId())
8847
8848     # Declare that we don't want to remove the instance lock anymore, as we've
8849     # added the instance to the config
8850     del self.remove_locks[locking.LEVEL_INSTANCE]
8851
8852     if self.op.mode == constants.INSTANCE_IMPORT:
8853       # Release unused nodes
8854       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8855     else:
8856       # Release all nodes
8857       _ReleaseLocks(self, locking.LEVEL_NODE)
8858
8859     disk_abort = False
8860     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8861       feedback_fn("* wiping instance disks...")
8862       try:
8863         _WipeDisks(self, iobj)
8864       except errors.OpExecError, err:
8865         logging.exception("Wiping disks failed")
8866         self.LogWarning("Wiping instance disks failed (%s)", err)
8867         disk_abort = True
8868
8869     if disk_abort:
8870       # Something is already wrong with the disks, don't do anything else
8871       pass
8872     elif self.op.wait_for_sync:
8873       disk_abort = not _WaitForSync(self, iobj)
8874     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8875       # make sure the disks are not degraded (still sync-ing is ok)
8876       feedback_fn("* checking mirrors status")
8877       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8878     else:
8879       disk_abort = False
8880
8881     if disk_abort:
8882       _RemoveDisks(self, iobj)
8883       self.cfg.RemoveInstance(iobj.name)
8884       # Make sure the instance lock gets removed
8885       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8886       raise errors.OpExecError("There are some degraded disks for"
8887                                " this instance")
8888
8889     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8890       if self.op.mode == constants.INSTANCE_CREATE:
8891         if not self.op.no_install:
8892           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
8893                         not self.op.wait_for_sync)
8894           if pause_sync:
8895             feedback_fn("* pausing disk sync to install instance OS")
8896             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8897                                                               iobj.disks, True)
8898             for idx, success in enumerate(result.payload):
8899               if not success:
8900                 logging.warn("pause-sync of instance %s for disk %d failed",
8901                              instance, idx)
8902
8903           feedback_fn("* running the instance OS create scripts...")
8904           # FIXME: pass debug option from opcode to backend
8905           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8906                                                  self.op.debug_level)
8907           if pause_sync:
8908             feedback_fn("* resuming disk sync")
8909             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8910                                                               iobj.disks, False)
8911             for idx, success in enumerate(result.payload):
8912               if not success:
8913                 logging.warn("resume-sync of instance %s for disk %d failed",
8914                              instance, idx)
8915
8916           result.Raise("Could not add os for instance %s"
8917                        " on node %s" % (instance, pnode_name))
8918
8919       elif self.op.mode == constants.INSTANCE_IMPORT:
8920         feedback_fn("* running the instance OS import scripts...")
8921
8922         transfers = []
8923
8924         for idx, image in enumerate(self.src_images):
8925           if not image:
8926             continue
8927
8928           # FIXME: pass debug option from opcode to backend
8929           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8930                                              constants.IEIO_FILE, (image, ),
8931                                              constants.IEIO_SCRIPT,
8932                                              (iobj.disks[idx], idx),
8933                                              None)
8934           transfers.append(dt)
8935
8936         import_result = \
8937           masterd.instance.TransferInstanceData(self, feedback_fn,
8938                                                 self.op.src_node, pnode_name,
8939                                                 self.pnode.secondary_ip,
8940                                                 iobj, transfers)
8941         if not compat.all(import_result):
8942           self.LogWarning("Some disks for instance %s on node %s were not"
8943                           " imported successfully" % (instance, pnode_name))
8944
8945       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8946         feedback_fn("* preparing remote import...")
8947         # The source cluster will stop the instance before attempting to make a
8948         # connection. In some cases stopping an instance can take a long time,
8949         # hence the shutdown timeout is added to the connection timeout.
8950         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8951                            self.op.source_shutdown_timeout)
8952         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8953
8954         assert iobj.primary_node == self.pnode.name
8955         disk_results = \
8956           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8957                                         self.source_x509_ca,
8958                                         self._cds, timeouts)
8959         if not compat.all(disk_results):
8960           # TODO: Should the instance still be started, even if some disks
8961           # failed to import (valid for local imports, too)?
8962           self.LogWarning("Some disks for instance %s on node %s were not"
8963                           " imported successfully" % (instance, pnode_name))
8964
8965         # Run rename script on newly imported instance
8966         assert iobj.name == instance
8967         feedback_fn("Running rename script for %s" % instance)
8968         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8969                                                    self.source_instance_name,
8970                                                    self.op.debug_level)
8971         if result.fail_msg:
8972           self.LogWarning("Failed to run rename script for %s on node"
8973                           " %s: %s" % (instance, pnode_name, result.fail_msg))
8974
8975       else:
8976         # also checked in the prereq part
8977         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8978                                      % self.op.mode)
8979
8980     if self.op.start:
8981       iobj.admin_up = True
8982       self.cfg.Update(iobj, feedback_fn)
8983       logging.info("Starting instance %s on node %s", instance, pnode_name)
8984       feedback_fn("* starting instance...")
8985       result = self.rpc.call_instance_start(pnode_name, iobj,
8986                                             None, None, False)
8987       result.Raise("Could not start instance")
8988
8989     return list(iobj.all_nodes)
8990
8991
8992 class LUInstanceConsole(NoHooksLU):
8993   """Connect to an instance's console.
8994
8995   This is somewhat special in that it returns the command line that
8996   you need to run on the master node in order to connect to the
8997   console.
8998
8999   """
9000   REQ_BGL = False
9001
9002   def ExpandNames(self):
9003     self._ExpandAndLockInstance()
9004
9005   def CheckPrereq(self):
9006     """Check prerequisites.
9007
9008     This checks that the instance is in the cluster.
9009
9010     """
9011     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9012     assert self.instance is not None, \
9013       "Cannot retrieve locked instance %s" % self.op.instance_name
9014     _CheckNodeOnline(self, self.instance.primary_node)
9015
9016   def Exec(self, feedback_fn):
9017     """Connect to the console of an instance
9018
9019     """
9020     instance = self.instance
9021     node = instance.primary_node
9022
9023     node_insts = self.rpc.call_instance_list([node],
9024                                              [instance.hypervisor])[node]
9025     node_insts.Raise("Can't get node information from %s" % node)
9026
9027     if instance.name not in node_insts.payload:
9028       if instance.admin_up:
9029         state = constants.INSTST_ERRORDOWN
9030       else:
9031         state = constants.INSTST_ADMINDOWN
9032       raise errors.OpExecError("Instance %s is not running (state %s)" %
9033                                (instance.name, state))
9034
9035     logging.debug("Connecting to console of %s on %s", instance.name, node)
9036
9037     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9038
9039
9040 def _GetInstanceConsole(cluster, instance):
9041   """Returns console information for an instance.
9042
9043   @type cluster: L{objects.Cluster}
9044   @type instance: L{objects.Instance}
9045   @rtype: dict
9046
9047   """
9048   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9049   # beparams and hvparams are passed separately, to avoid editing the
9050   # instance and then saving the defaults in the instance itself.
9051   hvparams = cluster.FillHV(instance)
9052   beparams = cluster.FillBE(instance)
9053   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9054
9055   assert console.instance == instance.name
9056   assert console.Validate()
9057
9058   return console.ToDict()
9059
9060
9061 class LUInstanceReplaceDisks(LogicalUnit):
9062   """Replace the disks of an instance.
9063
9064   """
9065   HPATH = "mirrors-replace"
9066   HTYPE = constants.HTYPE_INSTANCE
9067   REQ_BGL = False
9068
9069   def CheckArguments(self):
9070     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9071                                   self.op.iallocator)
9072
9073   def ExpandNames(self):
9074     self._ExpandAndLockInstance()
9075
9076     assert locking.LEVEL_NODE not in self.needed_locks
9077     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9078
9079     assert self.op.iallocator is None or self.op.remote_node is None, \
9080       "Conflicting options"
9081
9082     if self.op.remote_node is not None:
9083       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9084
9085       # Warning: do not remove the locking of the new secondary here
9086       # unless DRBD8.AddChildren is changed to work in parallel;
9087       # currently it doesn't since parallel invocations of
9088       # FindUnusedMinor will conflict
9089       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9090       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9091     else:
9092       self.needed_locks[locking.LEVEL_NODE] = []
9093       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9094
9095       if self.op.iallocator is not None:
9096         # iallocator will select a new node in the same group
9097         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9098
9099     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9100                                    self.op.iallocator, self.op.remote_node,
9101                                    self.op.disks, False, self.op.early_release)
9102
9103     self.tasklets = [self.replacer]
9104
9105   def DeclareLocks(self, level):
9106     if level == locking.LEVEL_NODEGROUP:
9107       assert self.op.remote_node is None
9108       assert self.op.iallocator is not None
9109       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9110
9111       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9112       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9113         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9114
9115     elif level == locking.LEVEL_NODE:
9116       if self.op.iallocator is not None:
9117         assert self.op.remote_node is None
9118         assert not self.needed_locks[locking.LEVEL_NODE]
9119
9120         # Lock member nodes of all locked groups
9121         self.needed_locks[locking.LEVEL_NODE] = [node_name
9122           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9123           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9124       else:
9125         self._LockInstancesNodes()
9126
9127   def BuildHooksEnv(self):
9128     """Build hooks env.
9129
9130     This runs on the master, the primary and all the secondaries.
9131
9132     """
9133     instance = self.replacer.instance
9134     env = {
9135       "MODE": self.op.mode,
9136       "NEW_SECONDARY": self.op.remote_node,
9137       "OLD_SECONDARY": instance.secondary_nodes[0],
9138       }
9139     env.update(_BuildInstanceHookEnvByObject(self, instance))
9140     return env
9141
9142   def BuildHooksNodes(self):
9143     """Build hooks nodes.
9144
9145     """
9146     instance = self.replacer.instance
9147     nl = [
9148       self.cfg.GetMasterNode(),
9149       instance.primary_node,
9150       ]
9151     if self.op.remote_node is not None:
9152       nl.append(self.op.remote_node)
9153     return nl, nl
9154
9155   def CheckPrereq(self):
9156     """Check prerequisites.
9157
9158     """
9159     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9160             self.op.iallocator is None)
9161
9162     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9163     if owned_groups:
9164       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9165
9166     return LogicalUnit.CheckPrereq(self)
9167
9168
9169 class TLReplaceDisks(Tasklet):
9170   """Replaces disks for an instance.
9171
9172   Note: Locking is not within the scope of this class.
9173
9174   """
9175   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9176                disks, delay_iallocator, early_release):
9177     """Initializes this class.
9178
9179     """
9180     Tasklet.__init__(self, lu)
9181
9182     # Parameters
9183     self.instance_name = instance_name
9184     self.mode = mode
9185     self.iallocator_name = iallocator_name
9186     self.remote_node = remote_node
9187     self.disks = disks
9188     self.delay_iallocator = delay_iallocator
9189     self.early_release = early_release
9190
9191     # Runtime data
9192     self.instance = None
9193     self.new_node = None
9194     self.target_node = None
9195     self.other_node = None
9196     self.remote_node_info = None
9197     self.node_secondary_ip = None
9198
9199   @staticmethod
9200   def CheckArguments(mode, remote_node, iallocator):
9201     """Helper function for users of this class.
9202
9203     """
9204     # check for valid parameter combination
9205     if mode == constants.REPLACE_DISK_CHG:
9206       if remote_node is None and iallocator is None:
9207         raise errors.OpPrereqError("When changing the secondary either an"
9208                                    " iallocator script must be used or the"
9209                                    " new node given", errors.ECODE_INVAL)
9210
9211       if remote_node is not None and iallocator is not None:
9212         raise errors.OpPrereqError("Give either the iallocator or the new"
9213                                    " secondary, not both", errors.ECODE_INVAL)
9214
9215     elif remote_node is not None or iallocator is not None:
9216       # Not replacing the secondary
9217       raise errors.OpPrereqError("The iallocator and new node options can"
9218                                  " only be used when changing the"
9219                                  " secondary node", errors.ECODE_INVAL)
9220
9221   @staticmethod
9222   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9223     """Compute a new secondary node using an IAllocator.
9224
9225     """
9226     ial = IAllocator(lu.cfg, lu.rpc,
9227                      mode=constants.IALLOCATOR_MODE_RELOC,
9228                      name=instance_name,
9229                      relocate_from=list(relocate_from))
9230
9231     ial.Run(iallocator_name)
9232
9233     if not ial.success:
9234       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9235                                  " %s" % (iallocator_name, ial.info),
9236                                  errors.ECODE_NORES)
9237
9238     if len(ial.result) != ial.required_nodes:
9239       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9240                                  " of nodes (%s), required %s" %
9241                                  (iallocator_name,
9242                                   len(ial.result), ial.required_nodes),
9243                                  errors.ECODE_FAULT)
9244
9245     remote_node_name = ial.result[0]
9246
9247     lu.LogInfo("Selected new secondary for instance '%s': %s",
9248                instance_name, remote_node_name)
9249
9250     return remote_node_name
9251
9252   def _FindFaultyDisks(self, node_name):
9253     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9254                                     node_name, True)
9255
9256   def _CheckDisksActivated(self, instance):
9257     """Checks if the instance disks are activated.
9258
9259     @param instance: The instance to check disks
9260     @return: True if they are activated, False otherwise
9261
9262     """
9263     nodes = instance.all_nodes
9264
9265     for idx, dev in enumerate(instance.disks):
9266       for node in nodes:
9267         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9268         self.cfg.SetDiskID(dev, node)
9269
9270         result = self.rpc.call_blockdev_find(node, dev)
9271
9272         if result.offline:
9273           continue
9274         elif result.fail_msg or not result.payload:
9275           return False
9276
9277     return True
9278
9279   def CheckPrereq(self):
9280     """Check prerequisites.
9281
9282     This checks that the instance is in the cluster.
9283
9284     """
9285     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9286     assert instance is not None, \
9287       "Cannot retrieve locked instance %s" % self.instance_name
9288
9289     if instance.disk_template != constants.DT_DRBD8:
9290       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9291                                  " instances", errors.ECODE_INVAL)
9292
9293     if len(instance.secondary_nodes) != 1:
9294       raise errors.OpPrereqError("The instance has a strange layout,"
9295                                  " expected one secondary but found %d" %
9296                                  len(instance.secondary_nodes),
9297                                  errors.ECODE_FAULT)
9298
9299     if not self.delay_iallocator:
9300       self._CheckPrereq2()
9301
9302   def _CheckPrereq2(self):
9303     """Check prerequisites, second part.
9304
9305     This function should always be part of CheckPrereq. It was separated and is
9306     now called from Exec because during node evacuation iallocator was only
9307     called with an unmodified cluster model, not taking planned changes into
9308     account.
9309
9310     """
9311     instance = self.instance
9312     secondary_node = instance.secondary_nodes[0]
9313
9314     if self.iallocator_name is None:
9315       remote_node = self.remote_node
9316     else:
9317       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9318                                        instance.name, instance.secondary_nodes)
9319
9320     if remote_node is None:
9321       self.remote_node_info = None
9322     else:
9323       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9324              "Remote node '%s' is not locked" % remote_node
9325
9326       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9327       assert self.remote_node_info is not None, \
9328         "Cannot retrieve locked node %s" % remote_node
9329
9330     if remote_node == self.instance.primary_node:
9331       raise errors.OpPrereqError("The specified node is the primary node of"
9332                                  " the instance", errors.ECODE_INVAL)
9333
9334     if remote_node == secondary_node:
9335       raise errors.OpPrereqError("The specified node is already the"
9336                                  " secondary node of the instance",
9337                                  errors.ECODE_INVAL)
9338
9339     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9340                                     constants.REPLACE_DISK_CHG):
9341       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9342                                  errors.ECODE_INVAL)
9343
9344     if self.mode == constants.REPLACE_DISK_AUTO:
9345       if not self._CheckDisksActivated(instance):
9346         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9347                                    " first" % self.instance_name,
9348                                    errors.ECODE_STATE)
9349       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9350       faulty_secondary = self._FindFaultyDisks(secondary_node)
9351
9352       if faulty_primary and faulty_secondary:
9353         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9354                                    " one node and can not be repaired"
9355                                    " automatically" % self.instance_name,
9356                                    errors.ECODE_STATE)
9357
9358       if faulty_primary:
9359         self.disks = faulty_primary
9360         self.target_node = instance.primary_node
9361         self.other_node = secondary_node
9362         check_nodes = [self.target_node, self.other_node]
9363       elif faulty_secondary:
9364         self.disks = faulty_secondary
9365         self.target_node = secondary_node
9366         self.other_node = instance.primary_node
9367         check_nodes = [self.target_node, self.other_node]
9368       else:
9369         self.disks = []
9370         check_nodes = []
9371
9372     else:
9373       # Non-automatic modes
9374       if self.mode == constants.REPLACE_DISK_PRI:
9375         self.target_node = instance.primary_node
9376         self.other_node = secondary_node
9377         check_nodes = [self.target_node, self.other_node]
9378
9379       elif self.mode == constants.REPLACE_DISK_SEC:
9380         self.target_node = secondary_node
9381         self.other_node = instance.primary_node
9382         check_nodes = [self.target_node, self.other_node]
9383
9384       elif self.mode == constants.REPLACE_DISK_CHG:
9385         self.new_node = remote_node
9386         self.other_node = instance.primary_node
9387         self.target_node = secondary_node
9388         check_nodes = [self.new_node, self.other_node]
9389
9390         _CheckNodeNotDrained(self.lu, remote_node)
9391         _CheckNodeVmCapable(self.lu, remote_node)
9392
9393         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9394         assert old_node_info is not None
9395         if old_node_info.offline and not self.early_release:
9396           # doesn't make sense to delay the release
9397           self.early_release = True
9398           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9399                           " early-release mode", secondary_node)
9400
9401       else:
9402         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9403                                      self.mode)
9404
9405       # If not specified all disks should be replaced
9406       if not self.disks:
9407         self.disks = range(len(self.instance.disks))
9408
9409     for node in check_nodes:
9410       _CheckNodeOnline(self.lu, node)
9411
9412     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9413                                                           self.other_node,
9414                                                           self.target_node]
9415                               if node_name is not None)
9416
9417     # Release unneeded node locks
9418     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9419
9420     # Release any owned node group
9421     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9422       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9423
9424     # Check whether disks are valid
9425     for disk_idx in self.disks:
9426       instance.FindDisk(disk_idx)
9427
9428     # Get secondary node IP addresses
9429     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9430                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9431
9432   def Exec(self, feedback_fn):
9433     """Execute disk replacement.
9434
9435     This dispatches the disk replacement to the appropriate handler.
9436
9437     """
9438     if self.delay_iallocator:
9439       self._CheckPrereq2()
9440
9441     if __debug__:
9442       # Verify owned locks before starting operation
9443       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9444       assert set(owned_nodes) == set(self.node_secondary_ip), \
9445           ("Incorrect node locks, owning %s, expected %s" %
9446            (owned_nodes, self.node_secondary_ip.keys()))
9447
9448       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9449       assert list(owned_instances) == [self.instance_name], \
9450           "Instance '%s' not locked" % self.instance_name
9451
9452       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9453           "Should not own any node group lock at this point"
9454
9455     if not self.disks:
9456       feedback_fn("No disks need replacement")
9457       return
9458
9459     feedback_fn("Replacing disk(s) %s for %s" %
9460                 (utils.CommaJoin(self.disks), self.instance.name))
9461
9462     activate_disks = (not self.instance.admin_up)
9463
9464     # Activate the instance disks if we're replacing them on a down instance
9465     if activate_disks:
9466       _StartInstanceDisks(self.lu, self.instance, True)
9467
9468     try:
9469       # Should we replace the secondary node?
9470       if self.new_node is not None:
9471         fn = self._ExecDrbd8Secondary
9472       else:
9473         fn = self._ExecDrbd8DiskOnly
9474
9475       result = fn(feedback_fn)
9476     finally:
9477       # Deactivate the instance disks if we're replacing them on a
9478       # down instance
9479       if activate_disks:
9480         _SafeShutdownInstanceDisks(self.lu, self.instance)
9481
9482     if __debug__:
9483       # Verify owned locks
9484       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9485       nodes = frozenset(self.node_secondary_ip)
9486       assert ((self.early_release and not owned_nodes) or
9487               (not self.early_release and not (set(owned_nodes) - nodes))), \
9488         ("Not owning the correct locks, early_release=%s, owned=%r,"
9489          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9490
9491     return result
9492
9493   def _CheckVolumeGroup(self, nodes):
9494     self.lu.LogInfo("Checking volume groups")
9495
9496     vgname = self.cfg.GetVGName()
9497
9498     # Make sure volume group exists on all involved nodes
9499     results = self.rpc.call_vg_list(nodes)
9500     if not results:
9501       raise errors.OpExecError("Can't list volume groups on the nodes")
9502
9503     for node in nodes:
9504       res = results[node]
9505       res.Raise("Error checking node %s" % node)
9506       if vgname not in res.payload:
9507         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9508                                  (vgname, node))
9509
9510   def _CheckDisksExistence(self, nodes):
9511     # Check disk existence
9512     for idx, dev in enumerate(self.instance.disks):
9513       if idx not in self.disks:
9514         continue
9515
9516       for node in nodes:
9517         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9518         self.cfg.SetDiskID(dev, node)
9519
9520         result = self.rpc.call_blockdev_find(node, dev)
9521
9522         msg = result.fail_msg
9523         if msg or not result.payload:
9524           if not msg:
9525             msg = "disk not found"
9526           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9527                                    (idx, node, msg))
9528
9529   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9530     for idx, dev in enumerate(self.instance.disks):
9531       if idx not in self.disks:
9532         continue
9533
9534       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9535                       (idx, node_name))
9536
9537       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9538                                    ldisk=ldisk):
9539         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9540                                  " replace disks for instance %s" %
9541                                  (node_name, self.instance.name))
9542
9543   def _CreateNewStorage(self, node_name):
9544     """Create new storage on the primary or secondary node.
9545
9546     This is only used for same-node replaces, not for changing the
9547     secondary node, hence we don't want to modify the existing disk.
9548
9549     """
9550     iv_names = {}
9551
9552     for idx, dev in enumerate(self.instance.disks):
9553       if idx not in self.disks:
9554         continue
9555
9556       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9557
9558       self.cfg.SetDiskID(dev, node_name)
9559
9560       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9561       names = _GenerateUniqueNames(self.lu, lv_names)
9562
9563       vg_data = dev.children[0].logical_id[0]
9564       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9565                              logical_id=(vg_data, names[0]))
9566       vg_meta = dev.children[1].logical_id[0]
9567       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9568                              logical_id=(vg_meta, names[1]))
9569
9570       new_lvs = [lv_data, lv_meta]
9571       old_lvs = [child.Copy() for child in dev.children]
9572       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9573
9574       # we pass force_create=True to force the LVM creation
9575       for new_lv in new_lvs:
9576         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9577                         _GetInstanceInfoText(self.instance), False)
9578
9579     return iv_names
9580
9581   def _CheckDevices(self, node_name, iv_names):
9582     for name, (dev, _, _) in iv_names.iteritems():
9583       self.cfg.SetDiskID(dev, node_name)
9584
9585       result = self.rpc.call_blockdev_find(node_name, dev)
9586
9587       msg = result.fail_msg
9588       if msg or not result.payload:
9589         if not msg:
9590           msg = "disk not found"
9591         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9592                                  (name, msg))
9593
9594       if result.payload.is_degraded:
9595         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9596
9597   def _RemoveOldStorage(self, node_name, iv_names):
9598     for name, (_, old_lvs, _) in iv_names.iteritems():
9599       self.lu.LogInfo("Remove logical volumes for %s" % name)
9600
9601       for lv in old_lvs:
9602         self.cfg.SetDiskID(lv, node_name)
9603
9604         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9605         if msg:
9606           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9607                              hint="remove unused LVs manually")
9608
9609   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9610     """Replace a disk on the primary or secondary for DRBD 8.
9611
9612     The algorithm for replace is quite complicated:
9613
9614       1. for each disk to be replaced:
9615
9616         1. create new LVs on the target node with unique names
9617         1. detach old LVs from the drbd device
9618         1. rename old LVs to name_replaced.<time_t>
9619         1. rename new LVs to old LVs
9620         1. attach the new LVs (with the old names now) to the drbd device
9621
9622       1. wait for sync across all devices
9623
9624       1. for each modified disk:
9625
9626         1. remove old LVs (which have the name name_replaces.<time_t>)
9627
9628     Failures are not very well handled.
9629
9630     """
9631     steps_total = 6
9632
9633     # Step: check device activation
9634     self.lu.LogStep(1, steps_total, "Check device existence")
9635     self._CheckDisksExistence([self.other_node, self.target_node])
9636     self._CheckVolumeGroup([self.target_node, self.other_node])
9637
9638     # Step: check other node consistency
9639     self.lu.LogStep(2, steps_total, "Check peer consistency")
9640     self._CheckDisksConsistency(self.other_node,
9641                                 self.other_node == self.instance.primary_node,
9642                                 False)
9643
9644     # Step: create new storage
9645     self.lu.LogStep(3, steps_total, "Allocate new storage")
9646     iv_names = self._CreateNewStorage(self.target_node)
9647
9648     # Step: for each lv, detach+rename*2+attach
9649     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9650     for dev, old_lvs, new_lvs in iv_names.itervalues():
9651       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9652
9653       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9654                                                      old_lvs)
9655       result.Raise("Can't detach drbd from local storage on node"
9656                    " %s for device %s" % (self.target_node, dev.iv_name))
9657       #dev.children = []
9658       #cfg.Update(instance)
9659
9660       # ok, we created the new LVs, so now we know we have the needed
9661       # storage; as such, we proceed on the target node to rename
9662       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9663       # using the assumption that logical_id == physical_id (which in
9664       # turn is the unique_id on that node)
9665
9666       # FIXME(iustin): use a better name for the replaced LVs
9667       temp_suffix = int(time.time())
9668       ren_fn = lambda d, suff: (d.physical_id[0],
9669                                 d.physical_id[1] + "_replaced-%s" % suff)
9670
9671       # Build the rename list based on what LVs exist on the node
9672       rename_old_to_new = []
9673       for to_ren in old_lvs:
9674         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9675         if not result.fail_msg and result.payload:
9676           # device exists
9677           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9678
9679       self.lu.LogInfo("Renaming the old LVs on the target node")
9680       result = self.rpc.call_blockdev_rename(self.target_node,
9681                                              rename_old_to_new)
9682       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9683
9684       # Now we rename the new LVs to the old LVs
9685       self.lu.LogInfo("Renaming the new LVs on the target node")
9686       rename_new_to_old = [(new, old.physical_id)
9687                            for old, new in zip(old_lvs, new_lvs)]
9688       result = self.rpc.call_blockdev_rename(self.target_node,
9689                                              rename_new_to_old)
9690       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9691
9692       # Intermediate steps of in memory modifications
9693       for old, new in zip(old_lvs, new_lvs):
9694         new.logical_id = old.logical_id
9695         self.cfg.SetDiskID(new, self.target_node)
9696
9697       # We need to modify old_lvs so that removal later removes the
9698       # right LVs, not the newly added ones; note that old_lvs is a
9699       # copy here
9700       for disk in old_lvs:
9701         disk.logical_id = ren_fn(disk, temp_suffix)
9702         self.cfg.SetDiskID(disk, self.target_node)
9703
9704       # Now that the new lvs have the old name, we can add them to the device
9705       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9706       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9707                                                   new_lvs)
9708       msg = result.fail_msg
9709       if msg:
9710         for new_lv in new_lvs:
9711           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9712                                                new_lv).fail_msg
9713           if msg2:
9714             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9715                                hint=("cleanup manually the unused logical"
9716                                      "volumes"))
9717         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9718
9719     cstep = 5
9720     if self.early_release:
9721       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9722       cstep += 1
9723       self._RemoveOldStorage(self.target_node, iv_names)
9724       # WARNING: we release both node locks here, do not do other RPCs
9725       # than WaitForSync to the primary node
9726       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9727                     names=[self.target_node, self.other_node])
9728
9729     # Wait for sync
9730     # This can fail as the old devices are degraded and _WaitForSync
9731     # does a combined result over all disks, so we don't check its return value
9732     self.lu.LogStep(cstep, steps_total, "Sync devices")
9733     cstep += 1
9734     _WaitForSync(self.lu, self.instance)
9735
9736     # Check all devices manually
9737     self._CheckDevices(self.instance.primary_node, iv_names)
9738
9739     # Step: remove old storage
9740     if not self.early_release:
9741       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9742       cstep += 1
9743       self._RemoveOldStorage(self.target_node, iv_names)
9744
9745   def _ExecDrbd8Secondary(self, feedback_fn):
9746     """Replace the secondary node for DRBD 8.
9747
9748     The algorithm for replace is quite complicated:
9749       - for all disks of the instance:
9750         - create new LVs on the new node with same names
9751         - shutdown the drbd device on the old secondary
9752         - disconnect the drbd network on the primary
9753         - create the drbd device on the new secondary
9754         - network attach the drbd on the primary, using an artifice:
9755           the drbd code for Attach() will connect to the network if it
9756           finds a device which is connected to the good local disks but
9757           not network enabled
9758       - wait for sync across all devices
9759       - remove all disks from the old secondary
9760
9761     Failures are not very well handled.
9762
9763     """
9764     steps_total = 6
9765
9766     # Step: check device activation
9767     self.lu.LogStep(1, steps_total, "Check device existence")
9768     self._CheckDisksExistence([self.instance.primary_node])
9769     self._CheckVolumeGroup([self.instance.primary_node])
9770
9771     # Step: check other node consistency
9772     self.lu.LogStep(2, steps_total, "Check peer consistency")
9773     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9774
9775     # Step: create new storage
9776     self.lu.LogStep(3, steps_total, "Allocate new storage")
9777     for idx, dev in enumerate(self.instance.disks):
9778       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9779                       (self.new_node, idx))
9780       # we pass force_create=True to force LVM creation
9781       for new_lv in dev.children:
9782         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9783                         _GetInstanceInfoText(self.instance), False)
9784
9785     # Step 4: dbrd minors and drbd setups changes
9786     # after this, we must manually remove the drbd minors on both the
9787     # error and the success paths
9788     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9789     minors = self.cfg.AllocateDRBDMinor([self.new_node
9790                                          for dev in self.instance.disks],
9791                                         self.instance.name)
9792     logging.debug("Allocated minors %r", minors)
9793
9794     iv_names = {}
9795     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9796       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9797                       (self.new_node, idx))
9798       # create new devices on new_node; note that we create two IDs:
9799       # one without port, so the drbd will be activated without
9800       # networking information on the new node at this stage, and one
9801       # with network, for the latter activation in step 4
9802       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9803       if self.instance.primary_node == o_node1:
9804         p_minor = o_minor1
9805       else:
9806         assert self.instance.primary_node == o_node2, "Three-node instance?"
9807         p_minor = o_minor2
9808
9809       new_alone_id = (self.instance.primary_node, self.new_node, None,
9810                       p_minor, new_minor, o_secret)
9811       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9812                     p_minor, new_minor, o_secret)
9813
9814       iv_names[idx] = (dev, dev.children, new_net_id)
9815       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9816                     new_net_id)
9817       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9818                               logical_id=new_alone_id,
9819                               children=dev.children,
9820                               size=dev.size)
9821       try:
9822         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9823                               _GetInstanceInfoText(self.instance), False)
9824       except errors.GenericError:
9825         self.cfg.ReleaseDRBDMinors(self.instance.name)
9826         raise
9827
9828     # We have new devices, shutdown the drbd on the old secondary
9829     for idx, dev in enumerate(self.instance.disks):
9830       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9831       self.cfg.SetDiskID(dev, self.target_node)
9832       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9833       if msg:
9834         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9835                            "node: %s" % (idx, msg),
9836                            hint=("Please cleanup this device manually as"
9837                                  " soon as possible"))
9838
9839     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9840     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9841                                                self.node_secondary_ip,
9842                                                self.instance.disks)\
9843                                               [self.instance.primary_node]
9844
9845     msg = result.fail_msg
9846     if msg:
9847       # detaches didn't succeed (unlikely)
9848       self.cfg.ReleaseDRBDMinors(self.instance.name)
9849       raise errors.OpExecError("Can't detach the disks from the network on"
9850                                " old node: %s" % (msg,))
9851
9852     # if we managed to detach at least one, we update all the disks of
9853     # the instance to point to the new secondary
9854     self.lu.LogInfo("Updating instance configuration")
9855     for dev, _, new_logical_id in iv_names.itervalues():
9856       dev.logical_id = new_logical_id
9857       self.cfg.SetDiskID(dev, self.instance.primary_node)
9858
9859     self.cfg.Update(self.instance, feedback_fn)
9860
9861     # and now perform the drbd attach
9862     self.lu.LogInfo("Attaching primary drbds to new secondary"
9863                     " (standalone => connected)")
9864     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9865                                             self.new_node],
9866                                            self.node_secondary_ip,
9867                                            self.instance.disks,
9868                                            self.instance.name,
9869                                            False)
9870     for to_node, to_result in result.items():
9871       msg = to_result.fail_msg
9872       if msg:
9873         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9874                            to_node, msg,
9875                            hint=("please do a gnt-instance info to see the"
9876                                  " status of disks"))
9877     cstep = 5
9878     if self.early_release:
9879       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9880       cstep += 1
9881       self._RemoveOldStorage(self.target_node, iv_names)
9882       # WARNING: we release all node locks here, do not do other RPCs
9883       # than WaitForSync to the primary node
9884       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9885                     names=[self.instance.primary_node,
9886                            self.target_node,
9887                            self.new_node])
9888
9889     # Wait for sync
9890     # This can fail as the old devices are degraded and _WaitForSync
9891     # does a combined result over all disks, so we don't check its return value
9892     self.lu.LogStep(cstep, steps_total, "Sync devices")
9893     cstep += 1
9894     _WaitForSync(self.lu, self.instance)
9895
9896     # Check all devices manually
9897     self._CheckDevices(self.instance.primary_node, iv_names)
9898
9899     # Step: remove old storage
9900     if not self.early_release:
9901       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9902       self._RemoveOldStorage(self.target_node, iv_names)
9903
9904
9905 class LURepairNodeStorage(NoHooksLU):
9906   """Repairs the volume group on a node.
9907
9908   """
9909   REQ_BGL = False
9910
9911   def CheckArguments(self):
9912     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9913
9914     storage_type = self.op.storage_type
9915
9916     if (constants.SO_FIX_CONSISTENCY not in
9917         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9918       raise errors.OpPrereqError("Storage units of type '%s' can not be"
9919                                  " repaired" % storage_type,
9920                                  errors.ECODE_INVAL)
9921
9922   def ExpandNames(self):
9923     self.needed_locks = {
9924       locking.LEVEL_NODE: [self.op.node_name],
9925       }
9926
9927   def _CheckFaultyDisks(self, instance, node_name):
9928     """Ensure faulty disks abort the opcode or at least warn."""
9929     try:
9930       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9931                                   node_name, True):
9932         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9933                                    " node '%s'" % (instance.name, node_name),
9934                                    errors.ECODE_STATE)
9935     except errors.OpPrereqError, err:
9936       if self.op.ignore_consistency:
9937         self.proc.LogWarning(str(err.args[0]))
9938       else:
9939         raise
9940
9941   def CheckPrereq(self):
9942     """Check prerequisites.
9943
9944     """
9945     # Check whether any instance on this node has faulty disks
9946     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9947       if not inst.admin_up:
9948         continue
9949       check_nodes = set(inst.all_nodes)
9950       check_nodes.discard(self.op.node_name)
9951       for inst_node_name in check_nodes:
9952         self._CheckFaultyDisks(inst, inst_node_name)
9953
9954   def Exec(self, feedback_fn):
9955     feedback_fn("Repairing storage unit '%s' on %s ..." %
9956                 (self.op.name, self.op.node_name))
9957
9958     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9959     result = self.rpc.call_storage_execute(self.op.node_name,
9960                                            self.op.storage_type, st_args,
9961                                            self.op.name,
9962                                            constants.SO_FIX_CONSISTENCY)
9963     result.Raise("Failed to repair storage unit '%s' on %s" %
9964                  (self.op.name, self.op.node_name))
9965
9966
9967 class LUNodeEvacuate(NoHooksLU):
9968   """Evacuates instances off a list of nodes.
9969
9970   """
9971   REQ_BGL = False
9972
9973   def CheckArguments(self):
9974     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9975
9976   def ExpandNames(self):
9977     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9978
9979     if self.op.remote_node is not None:
9980       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9981       assert self.op.remote_node
9982
9983       if self.op.remote_node == self.op.node_name:
9984         raise errors.OpPrereqError("Can not use evacuated node as a new"
9985                                    " secondary node", errors.ECODE_INVAL)
9986
9987       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
9988         raise errors.OpPrereqError("Without the use of an iallocator only"
9989                                    " secondary instances can be evacuated",
9990                                    errors.ECODE_INVAL)
9991
9992     # Declare locks
9993     self.share_locks = _ShareAll()
9994     self.needed_locks = {
9995       locking.LEVEL_INSTANCE: [],
9996       locking.LEVEL_NODEGROUP: [],
9997       locking.LEVEL_NODE: [],
9998       }
9999
10000     if self.op.remote_node is None:
10001       # Iallocator will choose any node(s) in the same group
10002       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10003     else:
10004       group_nodes = frozenset([self.op.remote_node])
10005
10006     # Determine nodes to be locked
10007     self.lock_nodes = set([self.op.node_name]) | group_nodes
10008
10009   def _DetermineInstances(self):
10010     """Builds list of instances to operate on.
10011
10012     """
10013     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10014
10015     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10016       # Primary instances only
10017       inst_fn = _GetNodePrimaryInstances
10018       assert self.op.remote_node is None, \
10019         "Evacuating primary instances requires iallocator"
10020     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10021       # Secondary instances only
10022       inst_fn = _GetNodeSecondaryInstances
10023     else:
10024       # All instances
10025       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10026       inst_fn = _GetNodeInstances
10027
10028     return inst_fn(self.cfg, self.op.node_name)
10029
10030   def DeclareLocks(self, level):
10031     if level == locking.LEVEL_INSTANCE:
10032       # Lock instances optimistically, needs verification once node and group
10033       # locks have been acquired
10034       self.needed_locks[locking.LEVEL_INSTANCE] = \
10035         set(i.name for i in self._DetermineInstances())
10036
10037     elif level == locking.LEVEL_NODEGROUP:
10038       # Lock node groups optimistically, needs verification once nodes have
10039       # been acquired
10040       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10041         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10042
10043     elif level == locking.LEVEL_NODE:
10044       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10045
10046   def CheckPrereq(self):
10047     # Verify locks
10048     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10049     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10050     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10051
10052     assert owned_nodes == self.lock_nodes
10053
10054     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10055     if owned_groups != wanted_groups:
10056       raise errors.OpExecError("Node groups changed since locks were acquired,"
10057                                " current groups are '%s', used to be '%s'" %
10058                                (utils.CommaJoin(wanted_groups),
10059                                 utils.CommaJoin(owned_groups)))
10060
10061     # Determine affected instances
10062     self.instances = self._DetermineInstances()
10063     self.instance_names = [i.name for i in self.instances]
10064
10065     if set(self.instance_names) != owned_instances:
10066       raise errors.OpExecError("Instances on node '%s' changed since locks"
10067                                " were acquired, current instances are '%s',"
10068                                " used to be '%s'" %
10069                                (self.op.node_name,
10070                                 utils.CommaJoin(self.instance_names),
10071                                 utils.CommaJoin(owned_instances)))
10072
10073     if self.instance_names:
10074       self.LogInfo("Evacuating instances from node '%s': %s",
10075                    self.op.node_name,
10076                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10077     else:
10078       self.LogInfo("No instances to evacuate from node '%s'",
10079                    self.op.node_name)
10080
10081     if self.op.remote_node is not None:
10082       for i in self.instances:
10083         if i.primary_node == self.op.remote_node:
10084           raise errors.OpPrereqError("Node %s is the primary node of"
10085                                      " instance %s, cannot use it as"
10086                                      " secondary" %
10087                                      (self.op.remote_node, i.name),
10088                                      errors.ECODE_INVAL)
10089
10090   def Exec(self, feedback_fn):
10091     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10092
10093     if not self.instance_names:
10094       # No instances to evacuate
10095       jobs = []
10096
10097     elif self.op.iallocator is not None:
10098       # TODO: Implement relocation to other group
10099       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10100                        evac_mode=self.op.mode,
10101                        instances=list(self.instance_names))
10102
10103       ial.Run(self.op.iallocator)
10104
10105       if not ial.success:
10106         raise errors.OpPrereqError("Can't compute node evacuation using"
10107                                    " iallocator '%s': %s" %
10108                                    (self.op.iallocator, ial.info),
10109                                    errors.ECODE_NORES)
10110
10111       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10112
10113     elif self.op.remote_node is not None:
10114       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10115       jobs = [
10116         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10117                                         remote_node=self.op.remote_node,
10118                                         disks=[],
10119                                         mode=constants.REPLACE_DISK_CHG,
10120                                         early_release=self.op.early_release)]
10121         for instance_name in self.instance_names
10122         ]
10123
10124     else:
10125       raise errors.ProgrammerError("No iallocator or remote node")
10126
10127     return ResultWithJobs(jobs)
10128
10129
10130 def _SetOpEarlyRelease(early_release, op):
10131   """Sets C{early_release} flag on opcodes if available.
10132
10133   """
10134   try:
10135     op.early_release = early_release
10136   except AttributeError:
10137     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10138
10139   return op
10140
10141
10142 def _NodeEvacDest(use_nodes, group, nodes):
10143   """Returns group or nodes depending on caller's choice.
10144
10145   """
10146   if use_nodes:
10147     return utils.CommaJoin(nodes)
10148   else:
10149     return group
10150
10151
10152 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10153   """Unpacks the result of change-group and node-evacuate iallocator requests.
10154
10155   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10156   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10157
10158   @type lu: L{LogicalUnit}
10159   @param lu: Logical unit instance
10160   @type alloc_result: tuple/list
10161   @param alloc_result: Result from iallocator
10162   @type early_release: bool
10163   @param early_release: Whether to release locks early if possible
10164   @type use_nodes: bool
10165   @param use_nodes: Whether to display node names instead of groups
10166
10167   """
10168   (moved, failed, jobs) = alloc_result
10169
10170   if failed:
10171     lu.LogWarning("Unable to evacuate instances %s",
10172                   utils.CommaJoin("%s (%s)" % (name, reason)
10173                                   for (name, reason) in failed))
10174
10175   if moved:
10176     lu.LogInfo("Instances to be moved: %s",
10177                utils.CommaJoin("%s (to %s)" %
10178                                (name, _NodeEvacDest(use_nodes, group, nodes))
10179                                for (name, group, nodes) in moved))
10180
10181   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10182               map(opcodes.OpCode.LoadOpCode, ops))
10183           for ops in jobs]
10184
10185
10186 class LUInstanceGrowDisk(LogicalUnit):
10187   """Grow a disk of an instance.
10188
10189   """
10190   HPATH = "disk-grow"
10191   HTYPE = constants.HTYPE_INSTANCE
10192   REQ_BGL = False
10193
10194   def ExpandNames(self):
10195     self._ExpandAndLockInstance()
10196     self.needed_locks[locking.LEVEL_NODE] = []
10197     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10198
10199   def DeclareLocks(self, level):
10200     if level == locking.LEVEL_NODE:
10201       self._LockInstancesNodes()
10202
10203   def BuildHooksEnv(self):
10204     """Build hooks env.
10205
10206     This runs on the master, the primary and all the secondaries.
10207
10208     """
10209     env = {
10210       "DISK": self.op.disk,
10211       "AMOUNT": self.op.amount,
10212       }
10213     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10214     return env
10215
10216   def BuildHooksNodes(self):
10217     """Build hooks nodes.
10218
10219     """
10220     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10221     return (nl, nl)
10222
10223   def CheckPrereq(self):
10224     """Check prerequisites.
10225
10226     This checks that the instance is in the cluster.
10227
10228     """
10229     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10230     assert instance is not None, \
10231       "Cannot retrieve locked instance %s" % self.op.instance_name
10232     nodenames = list(instance.all_nodes)
10233     for node in nodenames:
10234       _CheckNodeOnline(self, node)
10235
10236     self.instance = instance
10237
10238     if instance.disk_template not in constants.DTS_GROWABLE:
10239       raise errors.OpPrereqError("Instance's disk layout does not support"
10240                                  " growing", errors.ECODE_INVAL)
10241
10242     self.disk = instance.FindDisk(self.op.disk)
10243
10244     if instance.disk_template not in (constants.DT_FILE,
10245                                       constants.DT_SHARED_FILE):
10246       # TODO: check the free disk space for file, when that feature will be
10247       # supported
10248       _CheckNodesFreeDiskPerVG(self, nodenames,
10249                                self.disk.ComputeGrowth(self.op.amount))
10250
10251   def Exec(self, feedback_fn):
10252     """Execute disk grow.
10253
10254     """
10255     instance = self.instance
10256     disk = self.disk
10257
10258     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10259     if not disks_ok:
10260       raise errors.OpExecError("Cannot activate block device to grow")
10261
10262     # First run all grow ops in dry-run mode
10263     for node in instance.all_nodes:
10264       self.cfg.SetDiskID(disk, node)
10265       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10266       result.Raise("Grow request failed to node %s" % node)
10267
10268     # We know that (as far as we can test) operations across different
10269     # nodes will succeed, time to run it for real
10270     for node in instance.all_nodes:
10271       self.cfg.SetDiskID(disk, node)
10272       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10273       result.Raise("Grow request failed to node %s" % node)
10274
10275       # TODO: Rewrite code to work properly
10276       # DRBD goes into sync mode for a short amount of time after executing the
10277       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10278       # calling "resize" in sync mode fails. Sleeping for a short amount of
10279       # time is a work-around.
10280       time.sleep(5)
10281
10282     disk.RecordGrow(self.op.amount)
10283     self.cfg.Update(instance, feedback_fn)
10284     if self.op.wait_for_sync:
10285       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10286       if disk_abort:
10287         self.proc.LogWarning("Disk sync-ing has not returned a good"
10288                              " status; please check the instance")
10289       if not instance.admin_up:
10290         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10291     elif not instance.admin_up:
10292       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10293                            " not supposed to be running because no wait for"
10294                            " sync mode was requested")
10295
10296
10297 class LUInstanceQueryData(NoHooksLU):
10298   """Query runtime instance data.
10299
10300   """
10301   REQ_BGL = False
10302
10303   def ExpandNames(self):
10304     self.needed_locks = {}
10305
10306     # Use locking if requested or when non-static information is wanted
10307     if not (self.op.static or self.op.use_locking):
10308       self.LogWarning("Non-static data requested, locks need to be acquired")
10309       self.op.use_locking = True
10310
10311     if self.op.instances or not self.op.use_locking:
10312       # Expand instance names right here
10313       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10314     else:
10315       # Will use acquired locks
10316       self.wanted_names = None
10317
10318     if self.op.use_locking:
10319       self.share_locks = _ShareAll()
10320
10321       if self.wanted_names is None:
10322         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10323       else:
10324         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10325
10326       self.needed_locks[locking.LEVEL_NODE] = []
10327       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10328
10329   def DeclareLocks(self, level):
10330     if self.op.use_locking and level == locking.LEVEL_NODE:
10331       self._LockInstancesNodes()
10332
10333   def CheckPrereq(self):
10334     """Check prerequisites.
10335
10336     This only checks the optional instance list against the existing names.
10337
10338     """
10339     if self.wanted_names is None:
10340       assert self.op.use_locking, "Locking was not used"
10341       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10342
10343     self.wanted_instances = \
10344         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10345
10346   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10347     """Returns the status of a block device
10348
10349     """
10350     if self.op.static or not node:
10351       return None
10352
10353     self.cfg.SetDiskID(dev, node)
10354
10355     result = self.rpc.call_blockdev_find(node, dev)
10356     if result.offline:
10357       return None
10358
10359     result.Raise("Can't compute disk status for %s" % instance_name)
10360
10361     status = result.payload
10362     if status is None:
10363       return None
10364
10365     return (status.dev_path, status.major, status.minor,
10366             status.sync_percent, status.estimated_time,
10367             status.is_degraded, status.ldisk_status)
10368
10369   def _ComputeDiskStatus(self, instance, snode, dev):
10370     """Compute block device status.
10371
10372     """
10373     if dev.dev_type in constants.LDS_DRBD:
10374       # we change the snode then (otherwise we use the one passed in)
10375       if dev.logical_id[0] == instance.primary_node:
10376         snode = dev.logical_id[1]
10377       else:
10378         snode = dev.logical_id[0]
10379
10380     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10381                                               instance.name, dev)
10382     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10383
10384     if dev.children:
10385       dev_children = map(compat.partial(self._ComputeDiskStatus,
10386                                         instance, snode),
10387                          dev.children)
10388     else:
10389       dev_children = []
10390
10391     return {
10392       "iv_name": dev.iv_name,
10393       "dev_type": dev.dev_type,
10394       "logical_id": dev.logical_id,
10395       "physical_id": dev.physical_id,
10396       "pstatus": dev_pstatus,
10397       "sstatus": dev_sstatus,
10398       "children": dev_children,
10399       "mode": dev.mode,
10400       "size": dev.size,
10401       }
10402
10403   def Exec(self, feedback_fn):
10404     """Gather and return data"""
10405     result = {}
10406
10407     cluster = self.cfg.GetClusterInfo()
10408
10409     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10410                                           for i in self.wanted_instances)
10411     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10412       if self.op.static or pnode.offline:
10413         remote_state = None
10414         if pnode.offline:
10415           self.LogWarning("Primary node %s is marked offline, returning static"
10416                           " information only for instance %s" %
10417                           (pnode.name, instance.name))
10418       else:
10419         remote_info = self.rpc.call_instance_info(instance.primary_node,
10420                                                   instance.name,
10421                                                   instance.hypervisor)
10422         remote_info.Raise("Error checking node %s" % instance.primary_node)
10423         remote_info = remote_info.payload
10424         if remote_info and "state" in remote_info:
10425           remote_state = "up"
10426         else:
10427           remote_state = "down"
10428
10429       if instance.admin_up:
10430         config_state = "up"
10431       else:
10432         config_state = "down"
10433
10434       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10435                   instance.disks)
10436
10437       result[instance.name] = {
10438         "name": instance.name,
10439         "config_state": config_state,
10440         "run_state": remote_state,
10441         "pnode": instance.primary_node,
10442         "snodes": instance.secondary_nodes,
10443         "os": instance.os,
10444         # this happens to be the same format used for hooks
10445         "nics": _NICListToTuple(self, instance.nics),
10446         "disk_template": instance.disk_template,
10447         "disks": disks,
10448         "hypervisor": instance.hypervisor,
10449         "network_port": instance.network_port,
10450         "hv_instance": instance.hvparams,
10451         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10452         "be_instance": instance.beparams,
10453         "be_actual": cluster.FillBE(instance),
10454         "os_instance": instance.osparams,
10455         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10456         "serial_no": instance.serial_no,
10457         "mtime": instance.mtime,
10458         "ctime": instance.ctime,
10459         "uuid": instance.uuid,
10460         }
10461
10462     return result
10463
10464
10465 class LUInstanceSetParams(LogicalUnit):
10466   """Modifies an instances's parameters.
10467
10468   """
10469   HPATH = "instance-modify"
10470   HTYPE = constants.HTYPE_INSTANCE
10471   REQ_BGL = False
10472
10473   def CheckArguments(self):
10474     if not (self.op.nics or self.op.disks or self.op.disk_template or
10475             self.op.hvparams or self.op.beparams or self.op.os_name):
10476       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10477
10478     if self.op.hvparams:
10479       _CheckGlobalHvParams(self.op.hvparams)
10480
10481     # Disk validation
10482     disk_addremove = 0
10483     for disk_op, disk_dict in self.op.disks:
10484       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10485       if disk_op == constants.DDM_REMOVE:
10486         disk_addremove += 1
10487         continue
10488       elif disk_op == constants.DDM_ADD:
10489         disk_addremove += 1
10490       else:
10491         if not isinstance(disk_op, int):
10492           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10493         if not isinstance(disk_dict, dict):
10494           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10495           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10496
10497       if disk_op == constants.DDM_ADD:
10498         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10499         if mode not in constants.DISK_ACCESS_SET:
10500           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10501                                      errors.ECODE_INVAL)
10502         size = disk_dict.get(constants.IDISK_SIZE, None)
10503         if size is None:
10504           raise errors.OpPrereqError("Required disk parameter size missing",
10505                                      errors.ECODE_INVAL)
10506         try:
10507           size = int(size)
10508         except (TypeError, ValueError), err:
10509           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10510                                      str(err), errors.ECODE_INVAL)
10511         disk_dict[constants.IDISK_SIZE] = size
10512       else:
10513         # modification of disk
10514         if constants.IDISK_SIZE in disk_dict:
10515           raise errors.OpPrereqError("Disk size change not possible, use"
10516                                      " grow-disk", errors.ECODE_INVAL)
10517
10518     if disk_addremove > 1:
10519       raise errors.OpPrereqError("Only one disk add or remove operation"
10520                                  " supported at a time", errors.ECODE_INVAL)
10521
10522     if self.op.disks and self.op.disk_template is not None:
10523       raise errors.OpPrereqError("Disk template conversion and other disk"
10524                                  " changes not supported at the same time",
10525                                  errors.ECODE_INVAL)
10526
10527     if (self.op.disk_template and
10528         self.op.disk_template in constants.DTS_INT_MIRROR and
10529         self.op.remote_node is None):
10530       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10531                                  " one requires specifying a secondary node",
10532                                  errors.ECODE_INVAL)
10533
10534     # NIC validation
10535     nic_addremove = 0
10536     for nic_op, nic_dict in self.op.nics:
10537       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10538       if nic_op == constants.DDM_REMOVE:
10539         nic_addremove += 1
10540         continue
10541       elif nic_op == constants.DDM_ADD:
10542         nic_addremove += 1
10543       else:
10544         if not isinstance(nic_op, int):
10545           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10546         if not isinstance(nic_dict, dict):
10547           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10548           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10549
10550       # nic_dict should be a dict
10551       nic_ip = nic_dict.get(constants.INIC_IP, None)
10552       if nic_ip is not None:
10553         if nic_ip.lower() == constants.VALUE_NONE:
10554           nic_dict[constants.INIC_IP] = None
10555         else:
10556           if not netutils.IPAddress.IsValid(nic_ip):
10557             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10558                                        errors.ECODE_INVAL)
10559
10560       nic_bridge = nic_dict.get("bridge", None)
10561       nic_link = nic_dict.get(constants.INIC_LINK, None)
10562       if nic_bridge and nic_link:
10563         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10564                                    " at the same time", errors.ECODE_INVAL)
10565       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10566         nic_dict["bridge"] = None
10567       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10568         nic_dict[constants.INIC_LINK] = None
10569
10570       if nic_op == constants.DDM_ADD:
10571         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10572         if nic_mac is None:
10573           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10574
10575       if constants.INIC_MAC in nic_dict:
10576         nic_mac = nic_dict[constants.INIC_MAC]
10577         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10578           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10579
10580         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10581           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10582                                      " modifying an existing nic",
10583                                      errors.ECODE_INVAL)
10584
10585     if nic_addremove > 1:
10586       raise errors.OpPrereqError("Only one NIC add or remove operation"
10587                                  " supported at a time", errors.ECODE_INVAL)
10588
10589   def ExpandNames(self):
10590     self._ExpandAndLockInstance()
10591     self.needed_locks[locking.LEVEL_NODE] = []
10592     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10593
10594   def DeclareLocks(self, level):
10595     if level == locking.LEVEL_NODE:
10596       self._LockInstancesNodes()
10597       if self.op.disk_template and self.op.remote_node:
10598         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10599         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10600
10601   def BuildHooksEnv(self):
10602     """Build hooks env.
10603
10604     This runs on the master, primary and secondaries.
10605
10606     """
10607     args = dict()
10608     if constants.BE_MEMORY in self.be_new:
10609       args["memory"] = self.be_new[constants.BE_MEMORY]
10610     if constants.BE_VCPUS in self.be_new:
10611       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10612     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10613     # information at all.
10614     if self.op.nics:
10615       args["nics"] = []
10616       nic_override = dict(self.op.nics)
10617       for idx, nic in enumerate(self.instance.nics):
10618         if idx in nic_override:
10619           this_nic_override = nic_override[idx]
10620         else:
10621           this_nic_override = {}
10622         if constants.INIC_IP in this_nic_override:
10623           ip = this_nic_override[constants.INIC_IP]
10624         else:
10625           ip = nic.ip
10626         if constants.INIC_MAC in this_nic_override:
10627           mac = this_nic_override[constants.INIC_MAC]
10628         else:
10629           mac = nic.mac
10630         if idx in self.nic_pnew:
10631           nicparams = self.nic_pnew[idx]
10632         else:
10633           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10634         mode = nicparams[constants.NIC_MODE]
10635         link = nicparams[constants.NIC_LINK]
10636         args["nics"].append((ip, mac, mode, link))
10637       if constants.DDM_ADD in nic_override:
10638         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10639         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10640         nicparams = self.nic_pnew[constants.DDM_ADD]
10641         mode = nicparams[constants.NIC_MODE]
10642         link = nicparams[constants.NIC_LINK]
10643         args["nics"].append((ip, mac, mode, link))
10644       elif constants.DDM_REMOVE in nic_override:
10645         del args["nics"][-1]
10646
10647     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10648     if self.op.disk_template:
10649       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10650
10651     return env
10652
10653   def BuildHooksNodes(self):
10654     """Build hooks nodes.
10655
10656     """
10657     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10658     return (nl, nl)
10659
10660   def CheckPrereq(self):
10661     """Check prerequisites.
10662
10663     This only checks the instance list against the existing names.
10664
10665     """
10666     # checking the new params on the primary/secondary nodes
10667
10668     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10669     cluster = self.cluster = self.cfg.GetClusterInfo()
10670     assert self.instance is not None, \
10671       "Cannot retrieve locked instance %s" % self.op.instance_name
10672     pnode = instance.primary_node
10673     nodelist = list(instance.all_nodes)
10674
10675     # OS change
10676     if self.op.os_name and not self.op.force:
10677       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10678                       self.op.force_variant)
10679       instance_os = self.op.os_name
10680     else:
10681       instance_os = instance.os
10682
10683     if self.op.disk_template:
10684       if instance.disk_template == self.op.disk_template:
10685         raise errors.OpPrereqError("Instance already has disk template %s" %
10686                                    instance.disk_template, errors.ECODE_INVAL)
10687
10688       if (instance.disk_template,
10689           self.op.disk_template) not in self._DISK_CONVERSIONS:
10690         raise errors.OpPrereqError("Unsupported disk template conversion from"
10691                                    " %s to %s" % (instance.disk_template,
10692                                                   self.op.disk_template),
10693                                    errors.ECODE_INVAL)
10694       _CheckInstanceDown(self, instance, "cannot change disk template")
10695       if self.op.disk_template in constants.DTS_INT_MIRROR:
10696         if self.op.remote_node == pnode:
10697           raise errors.OpPrereqError("Given new secondary node %s is the same"
10698                                      " as the primary node of the instance" %
10699                                      self.op.remote_node, errors.ECODE_STATE)
10700         _CheckNodeOnline(self, self.op.remote_node)
10701         _CheckNodeNotDrained(self, self.op.remote_node)
10702         # FIXME: here we assume that the old instance type is DT_PLAIN
10703         assert instance.disk_template == constants.DT_PLAIN
10704         disks = [{constants.IDISK_SIZE: d.size,
10705                   constants.IDISK_VG: d.logical_id[0]}
10706                  for d in instance.disks]
10707         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10708         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10709
10710     # hvparams processing
10711     if self.op.hvparams:
10712       hv_type = instance.hypervisor
10713       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10714       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10715       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10716
10717       # local check
10718       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10719       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10720       self.hv_new = hv_new # the new actual values
10721       self.hv_inst = i_hvdict # the new dict (without defaults)
10722     else:
10723       self.hv_new = self.hv_inst = {}
10724
10725     # beparams processing
10726     if self.op.beparams:
10727       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10728                                    use_none=True)
10729       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10730       be_new = cluster.SimpleFillBE(i_bedict)
10731       self.be_new = be_new # the new actual values
10732       self.be_inst = i_bedict # the new dict (without defaults)
10733     else:
10734       self.be_new = self.be_inst = {}
10735     be_old = cluster.FillBE(instance)
10736
10737     # osparams processing
10738     if self.op.osparams:
10739       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10740       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10741       self.os_inst = i_osdict # the new dict (without defaults)
10742     else:
10743       self.os_inst = {}
10744
10745     self.warn = []
10746
10747     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10748         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10749       mem_check_list = [pnode]
10750       if be_new[constants.BE_AUTO_BALANCE]:
10751         # either we changed auto_balance to yes or it was from before
10752         mem_check_list.extend(instance.secondary_nodes)
10753       instance_info = self.rpc.call_instance_info(pnode, instance.name,
10754                                                   instance.hypervisor)
10755       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10756                                          instance.hypervisor)
10757       pninfo = nodeinfo[pnode]
10758       msg = pninfo.fail_msg
10759       if msg:
10760         # Assume the primary node is unreachable and go ahead
10761         self.warn.append("Can't get info from primary node %s: %s" %
10762                          (pnode,  msg))
10763       elif not isinstance(pninfo.payload.get("memory_free", None), int):
10764         self.warn.append("Node data from primary node %s doesn't contain"
10765                          " free memory information" % pnode)
10766       elif instance_info.fail_msg:
10767         self.warn.append("Can't get instance runtime information: %s" %
10768                         instance_info.fail_msg)
10769       else:
10770         if instance_info.payload:
10771           current_mem = int(instance_info.payload["memory"])
10772         else:
10773           # Assume instance not running
10774           # (there is a slight race condition here, but it's not very probable,
10775           # and we have no other way to check)
10776           current_mem = 0
10777         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10778                     pninfo.payload["memory_free"])
10779         if miss_mem > 0:
10780           raise errors.OpPrereqError("This change will prevent the instance"
10781                                      " from starting, due to %d MB of memory"
10782                                      " missing on its primary node" % miss_mem,
10783                                      errors.ECODE_NORES)
10784
10785       if be_new[constants.BE_AUTO_BALANCE]:
10786         for node, nres in nodeinfo.items():
10787           if node not in instance.secondary_nodes:
10788             continue
10789           nres.Raise("Can't get info from secondary node %s" % node,
10790                      prereq=True, ecode=errors.ECODE_STATE)
10791           if not isinstance(nres.payload.get("memory_free", None), int):
10792             raise errors.OpPrereqError("Secondary node %s didn't return free"
10793                                        " memory information" % node,
10794                                        errors.ECODE_STATE)
10795           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10796             raise errors.OpPrereqError("This change will prevent the instance"
10797                                        " from failover to its secondary node"
10798                                        " %s, due to not enough memory" % node,
10799                                        errors.ECODE_STATE)
10800
10801     # NIC processing
10802     self.nic_pnew = {}
10803     self.nic_pinst = {}
10804     for nic_op, nic_dict in self.op.nics:
10805       if nic_op == constants.DDM_REMOVE:
10806         if not instance.nics:
10807           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10808                                      errors.ECODE_INVAL)
10809         continue
10810       if nic_op != constants.DDM_ADD:
10811         # an existing nic
10812         if not instance.nics:
10813           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10814                                      " no NICs" % nic_op,
10815                                      errors.ECODE_INVAL)
10816         if nic_op < 0 or nic_op >= len(instance.nics):
10817           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10818                                      " are 0 to %d" %
10819                                      (nic_op, len(instance.nics) - 1),
10820                                      errors.ECODE_INVAL)
10821         old_nic_params = instance.nics[nic_op].nicparams
10822         old_nic_ip = instance.nics[nic_op].ip
10823       else:
10824         old_nic_params = {}
10825         old_nic_ip = None
10826
10827       update_params_dict = dict([(key, nic_dict[key])
10828                                  for key in constants.NICS_PARAMETERS
10829                                  if key in nic_dict])
10830
10831       if "bridge" in nic_dict:
10832         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10833
10834       new_nic_params = _GetUpdatedParams(old_nic_params,
10835                                          update_params_dict)
10836       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10837       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10838       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10839       self.nic_pinst[nic_op] = new_nic_params
10840       self.nic_pnew[nic_op] = new_filled_nic_params
10841       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10842
10843       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10844         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10845         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10846         if msg:
10847           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10848           if self.op.force:
10849             self.warn.append(msg)
10850           else:
10851             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10852       if new_nic_mode == constants.NIC_MODE_ROUTED:
10853         if constants.INIC_IP in nic_dict:
10854           nic_ip = nic_dict[constants.INIC_IP]
10855         else:
10856           nic_ip = old_nic_ip
10857         if nic_ip is None:
10858           raise errors.OpPrereqError("Cannot set the nic ip to None"
10859                                      " on a routed nic", errors.ECODE_INVAL)
10860       if constants.INIC_MAC in nic_dict:
10861         nic_mac = nic_dict[constants.INIC_MAC]
10862         if nic_mac is None:
10863           raise errors.OpPrereqError("Cannot set the nic mac to None",
10864                                      errors.ECODE_INVAL)
10865         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10866           # otherwise generate the mac
10867           nic_dict[constants.INIC_MAC] = \
10868             self.cfg.GenerateMAC(self.proc.GetECId())
10869         else:
10870           # or validate/reserve the current one
10871           try:
10872             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10873           except errors.ReservationError:
10874             raise errors.OpPrereqError("MAC address %s already in use"
10875                                        " in cluster" % nic_mac,
10876                                        errors.ECODE_NOTUNIQUE)
10877
10878     # DISK processing
10879     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10880       raise errors.OpPrereqError("Disk operations not supported for"
10881                                  " diskless instances",
10882                                  errors.ECODE_INVAL)
10883     for disk_op, _ in self.op.disks:
10884       if disk_op == constants.DDM_REMOVE:
10885         if len(instance.disks) == 1:
10886           raise errors.OpPrereqError("Cannot remove the last disk of"
10887                                      " an instance", errors.ECODE_INVAL)
10888         _CheckInstanceDown(self, instance, "cannot remove disks")
10889
10890       if (disk_op == constants.DDM_ADD and
10891           len(instance.disks) >= constants.MAX_DISKS):
10892         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10893                                    " add more" % constants.MAX_DISKS,
10894                                    errors.ECODE_STATE)
10895       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10896         # an existing disk
10897         if disk_op < 0 or disk_op >= len(instance.disks):
10898           raise errors.OpPrereqError("Invalid disk index %s, valid values"
10899                                      " are 0 to %d" %
10900                                      (disk_op, len(instance.disks)),
10901                                      errors.ECODE_INVAL)
10902
10903     return
10904
10905   def _ConvertPlainToDrbd(self, feedback_fn):
10906     """Converts an instance from plain to drbd.
10907
10908     """
10909     feedback_fn("Converting template to drbd")
10910     instance = self.instance
10911     pnode = instance.primary_node
10912     snode = self.op.remote_node
10913
10914     # create a fake disk info for _GenerateDiskTemplate
10915     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10916                   constants.IDISK_VG: d.logical_id[0]}
10917                  for d in instance.disks]
10918     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10919                                       instance.name, pnode, [snode],
10920                                       disk_info, None, None, 0, feedback_fn)
10921     info = _GetInstanceInfoText(instance)
10922     feedback_fn("Creating aditional volumes...")
10923     # first, create the missing data and meta devices
10924     for disk in new_disks:
10925       # unfortunately this is... not too nice
10926       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10927                             info, True)
10928       for child in disk.children:
10929         _CreateSingleBlockDev(self, snode, instance, child, info, True)
10930     # at this stage, all new LVs have been created, we can rename the
10931     # old ones
10932     feedback_fn("Renaming original volumes...")
10933     rename_list = [(o, n.children[0].logical_id)
10934                    for (o, n) in zip(instance.disks, new_disks)]
10935     result = self.rpc.call_blockdev_rename(pnode, rename_list)
10936     result.Raise("Failed to rename original LVs")
10937
10938     feedback_fn("Initializing DRBD devices...")
10939     # all child devices are in place, we can now create the DRBD devices
10940     for disk in new_disks:
10941       for node in [pnode, snode]:
10942         f_create = node == pnode
10943         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10944
10945     # at this point, the instance has been modified
10946     instance.disk_template = constants.DT_DRBD8
10947     instance.disks = new_disks
10948     self.cfg.Update(instance, feedback_fn)
10949
10950     # disks are created, waiting for sync
10951     disk_abort = not _WaitForSync(self, instance,
10952                                   oneshot=not self.op.wait_for_sync)
10953     if disk_abort:
10954       raise errors.OpExecError("There are some degraded disks for"
10955                                " this instance, please cleanup manually")
10956
10957   def _ConvertDrbdToPlain(self, feedback_fn):
10958     """Converts an instance from drbd to plain.
10959
10960     """
10961     instance = self.instance
10962     assert len(instance.secondary_nodes) == 1
10963     pnode = instance.primary_node
10964     snode = instance.secondary_nodes[0]
10965     feedback_fn("Converting template to plain")
10966
10967     old_disks = instance.disks
10968     new_disks = [d.children[0] for d in old_disks]
10969
10970     # copy over size and mode
10971     for parent, child in zip(old_disks, new_disks):
10972       child.size = parent.size
10973       child.mode = parent.mode
10974
10975     # update instance structure
10976     instance.disks = new_disks
10977     instance.disk_template = constants.DT_PLAIN
10978     self.cfg.Update(instance, feedback_fn)
10979
10980     feedback_fn("Removing volumes on the secondary node...")
10981     for disk in old_disks:
10982       self.cfg.SetDiskID(disk, snode)
10983       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10984       if msg:
10985         self.LogWarning("Could not remove block device %s on node %s,"
10986                         " continuing anyway: %s", disk.iv_name, snode, msg)
10987
10988     feedback_fn("Removing unneeded volumes on the primary node...")
10989     for idx, disk in enumerate(old_disks):
10990       meta = disk.children[1]
10991       self.cfg.SetDiskID(meta, pnode)
10992       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10993       if msg:
10994         self.LogWarning("Could not remove metadata for disk %d on node %s,"
10995                         " continuing anyway: %s", idx, pnode, msg)
10996
10997   def Exec(self, feedback_fn):
10998     """Modifies an instance.
10999
11000     All parameters take effect only at the next restart of the instance.
11001
11002     """
11003     # Process here the warnings from CheckPrereq, as we don't have a
11004     # feedback_fn there.
11005     for warn in self.warn:
11006       feedback_fn("WARNING: %s" % warn)
11007
11008     result = []
11009     instance = self.instance
11010     # disk changes
11011     for disk_op, disk_dict in self.op.disks:
11012       if disk_op == constants.DDM_REMOVE:
11013         # remove the last disk
11014         device = instance.disks.pop()
11015         device_idx = len(instance.disks)
11016         for node, disk in device.ComputeNodeTree(instance.primary_node):
11017           self.cfg.SetDiskID(disk, node)
11018           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11019           if msg:
11020             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11021                             " continuing anyway", device_idx, node, msg)
11022         result.append(("disk/%d" % device_idx, "remove"))
11023       elif disk_op == constants.DDM_ADD:
11024         # add a new disk
11025         if instance.disk_template in (constants.DT_FILE,
11026                                         constants.DT_SHARED_FILE):
11027           file_driver, file_path = instance.disks[0].logical_id
11028           file_path = os.path.dirname(file_path)
11029         else:
11030           file_driver = file_path = None
11031         disk_idx_base = len(instance.disks)
11032         new_disk = _GenerateDiskTemplate(self,
11033                                          instance.disk_template,
11034                                          instance.name, instance.primary_node,
11035                                          instance.secondary_nodes,
11036                                          [disk_dict],
11037                                          file_path,
11038                                          file_driver,
11039                                          disk_idx_base, feedback_fn)[0]
11040         instance.disks.append(new_disk)
11041         info = _GetInstanceInfoText(instance)
11042
11043         logging.info("Creating volume %s for instance %s",
11044                      new_disk.iv_name, instance.name)
11045         # Note: this needs to be kept in sync with _CreateDisks
11046         #HARDCODE
11047         for node in instance.all_nodes:
11048           f_create = node == instance.primary_node
11049           try:
11050             _CreateBlockDev(self, node, instance, new_disk,
11051                             f_create, info, f_create)
11052           except errors.OpExecError, err:
11053             self.LogWarning("Failed to create volume %s (%s) on"
11054                             " node %s: %s",
11055                             new_disk.iv_name, new_disk, node, err)
11056         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11057                        (new_disk.size, new_disk.mode)))
11058       else:
11059         # change a given disk
11060         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11061         result.append(("disk.mode/%d" % disk_op,
11062                        disk_dict[constants.IDISK_MODE]))
11063
11064     if self.op.disk_template:
11065       r_shut = _ShutdownInstanceDisks(self, instance)
11066       if not r_shut:
11067         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11068                                  " proceed with disk template conversion")
11069       mode = (instance.disk_template, self.op.disk_template)
11070       try:
11071         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11072       except:
11073         self.cfg.ReleaseDRBDMinors(instance.name)
11074         raise
11075       result.append(("disk_template", self.op.disk_template))
11076
11077     # NIC changes
11078     for nic_op, nic_dict in self.op.nics:
11079       if nic_op == constants.DDM_REMOVE:
11080         # remove the last nic
11081         del instance.nics[-1]
11082         result.append(("nic.%d" % len(instance.nics), "remove"))
11083       elif nic_op == constants.DDM_ADD:
11084         # mac and bridge should be set, by now
11085         mac = nic_dict[constants.INIC_MAC]
11086         ip = nic_dict.get(constants.INIC_IP, None)
11087         nicparams = self.nic_pinst[constants.DDM_ADD]
11088         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11089         instance.nics.append(new_nic)
11090         result.append(("nic.%d" % (len(instance.nics) - 1),
11091                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11092                        (new_nic.mac, new_nic.ip,
11093                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11094                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11095                        )))
11096       else:
11097         for key in (constants.INIC_MAC, constants.INIC_IP):
11098           if key in nic_dict:
11099             setattr(instance.nics[nic_op], key, nic_dict[key])
11100         if nic_op in self.nic_pinst:
11101           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11102         for key, val in nic_dict.iteritems():
11103           result.append(("nic.%s/%d" % (key, nic_op), val))
11104
11105     # hvparams changes
11106     if self.op.hvparams:
11107       instance.hvparams = self.hv_inst
11108       for key, val in self.op.hvparams.iteritems():
11109         result.append(("hv/%s" % key, val))
11110
11111     # beparams changes
11112     if self.op.beparams:
11113       instance.beparams = self.be_inst
11114       for key, val in self.op.beparams.iteritems():
11115         result.append(("be/%s" % key, val))
11116
11117     # OS change
11118     if self.op.os_name:
11119       instance.os = self.op.os_name
11120
11121     # osparams changes
11122     if self.op.osparams:
11123       instance.osparams = self.os_inst
11124       for key, val in self.op.osparams.iteritems():
11125         result.append(("os/%s" % key, val))
11126
11127     self.cfg.Update(instance, feedback_fn)
11128
11129     return result
11130
11131   _DISK_CONVERSIONS = {
11132     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11133     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11134     }
11135
11136
11137 class LUInstanceChangeGroup(LogicalUnit):
11138   HPATH = "instance-change-group"
11139   HTYPE = constants.HTYPE_INSTANCE
11140   REQ_BGL = False
11141
11142   def ExpandNames(self):
11143     self.share_locks = _ShareAll()
11144     self.needed_locks = {
11145       locking.LEVEL_NODEGROUP: [],
11146       locking.LEVEL_NODE: [],
11147       }
11148
11149     self._ExpandAndLockInstance()
11150
11151     if self.op.target_groups:
11152       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11153                                   self.op.target_groups)
11154     else:
11155       self.req_target_uuids = None
11156
11157     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11158
11159   def DeclareLocks(self, level):
11160     if level == locking.LEVEL_NODEGROUP:
11161       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11162
11163       if self.req_target_uuids:
11164         lock_groups = set(self.req_target_uuids)
11165
11166         # Lock all groups used by instance optimistically; this requires going
11167         # via the node before it's locked, requiring verification later on
11168         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11169         lock_groups.update(instance_groups)
11170       else:
11171         # No target groups, need to lock all of them
11172         lock_groups = locking.ALL_SET
11173
11174       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11175
11176     elif level == locking.LEVEL_NODE:
11177       if self.req_target_uuids:
11178         # Lock all nodes used by instances
11179         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11180         self._LockInstancesNodes()
11181
11182         # Lock all nodes in all potential target groups
11183         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11184                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11185         member_nodes = [node_name
11186                         for group in lock_groups
11187                         for node_name in self.cfg.GetNodeGroup(group).members]
11188         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11189       else:
11190         # Lock all nodes as all groups are potential targets
11191         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11192
11193   def CheckPrereq(self):
11194     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11195     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11196     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11197
11198     assert (self.req_target_uuids is None or
11199             owned_groups.issuperset(self.req_target_uuids))
11200     assert owned_instances == set([self.op.instance_name])
11201
11202     # Get instance information
11203     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11204
11205     # Check if node groups for locked instance are still correct
11206     assert owned_nodes.issuperset(self.instance.all_nodes), \
11207       ("Instance %s's nodes changed while we kept the lock" %
11208        self.op.instance_name)
11209
11210     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11211                                            owned_groups)
11212
11213     if self.req_target_uuids:
11214       # User requested specific target groups
11215       self.target_uuids = self.req_target_uuids
11216     else:
11217       # All groups except those used by the instance are potential targets
11218       self.target_uuids = owned_groups - inst_groups
11219
11220     conflicting_groups = self.target_uuids & inst_groups
11221     if conflicting_groups:
11222       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11223                                  " used by the instance '%s'" %
11224                                  (utils.CommaJoin(conflicting_groups),
11225                                   self.op.instance_name),
11226                                  errors.ECODE_INVAL)
11227
11228     if not self.target_uuids:
11229       raise errors.OpPrereqError("There are no possible target groups",
11230                                  errors.ECODE_INVAL)
11231
11232   def BuildHooksEnv(self):
11233     """Build hooks env.
11234
11235     """
11236     assert self.target_uuids
11237
11238     env = {
11239       "TARGET_GROUPS": " ".join(self.target_uuids),
11240       }
11241
11242     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11243
11244     return env
11245
11246   def BuildHooksNodes(self):
11247     """Build hooks nodes.
11248
11249     """
11250     mn = self.cfg.GetMasterNode()
11251     return ([mn], [mn])
11252
11253   def Exec(self, feedback_fn):
11254     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11255
11256     assert instances == [self.op.instance_name], "Instance not locked"
11257
11258     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11259                      instances=instances, target_groups=list(self.target_uuids))
11260
11261     ial.Run(self.op.iallocator)
11262
11263     if not ial.success:
11264       raise errors.OpPrereqError("Can't compute solution for changing group of"
11265                                  " instance '%s' using iallocator '%s': %s" %
11266                                  (self.op.instance_name, self.op.iallocator,
11267                                   ial.info),
11268                                  errors.ECODE_NORES)
11269
11270     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11271
11272     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11273                  " instance '%s'", len(jobs), self.op.instance_name)
11274
11275     return ResultWithJobs(jobs)
11276
11277
11278 class LUBackupQuery(NoHooksLU):
11279   """Query the exports list
11280
11281   """
11282   REQ_BGL = False
11283
11284   def ExpandNames(self):
11285     self.needed_locks = {}
11286     self.share_locks[locking.LEVEL_NODE] = 1
11287     if not self.op.nodes:
11288       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11289     else:
11290       self.needed_locks[locking.LEVEL_NODE] = \
11291         _GetWantedNodes(self, self.op.nodes)
11292
11293   def Exec(self, feedback_fn):
11294     """Compute the list of all the exported system images.
11295
11296     @rtype: dict
11297     @return: a dictionary with the structure node->(export-list)
11298         where export-list is a list of the instances exported on
11299         that node.
11300
11301     """
11302     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11303     rpcresult = self.rpc.call_export_list(self.nodes)
11304     result = {}
11305     for node in rpcresult:
11306       if rpcresult[node].fail_msg:
11307         result[node] = False
11308       else:
11309         result[node] = rpcresult[node].payload
11310
11311     return result
11312
11313
11314 class LUBackupPrepare(NoHooksLU):
11315   """Prepares an instance for an export and returns useful information.
11316
11317   """
11318   REQ_BGL = False
11319
11320   def ExpandNames(self):
11321     self._ExpandAndLockInstance()
11322
11323   def CheckPrereq(self):
11324     """Check prerequisites.
11325
11326     """
11327     instance_name = self.op.instance_name
11328
11329     self.instance = self.cfg.GetInstanceInfo(instance_name)
11330     assert self.instance is not None, \
11331           "Cannot retrieve locked instance %s" % self.op.instance_name
11332     _CheckNodeOnline(self, self.instance.primary_node)
11333
11334     self._cds = _GetClusterDomainSecret()
11335
11336   def Exec(self, feedback_fn):
11337     """Prepares an instance for an export.
11338
11339     """
11340     instance = self.instance
11341
11342     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11343       salt = utils.GenerateSecret(8)
11344
11345       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11346       result = self.rpc.call_x509_cert_create(instance.primary_node,
11347                                               constants.RIE_CERT_VALIDITY)
11348       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11349
11350       (name, cert_pem) = result.payload
11351
11352       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11353                                              cert_pem)
11354
11355       return {
11356         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11357         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11358                           salt),
11359         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11360         }
11361
11362     return None
11363
11364
11365 class LUBackupExport(LogicalUnit):
11366   """Export an instance to an image in the cluster.
11367
11368   """
11369   HPATH = "instance-export"
11370   HTYPE = constants.HTYPE_INSTANCE
11371   REQ_BGL = False
11372
11373   def CheckArguments(self):
11374     """Check the arguments.
11375
11376     """
11377     self.x509_key_name = self.op.x509_key_name
11378     self.dest_x509_ca_pem = self.op.destination_x509_ca
11379
11380     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11381       if not self.x509_key_name:
11382         raise errors.OpPrereqError("Missing X509 key name for encryption",
11383                                    errors.ECODE_INVAL)
11384
11385       if not self.dest_x509_ca_pem:
11386         raise errors.OpPrereqError("Missing destination X509 CA",
11387                                    errors.ECODE_INVAL)
11388
11389   def ExpandNames(self):
11390     self._ExpandAndLockInstance()
11391
11392     # Lock all nodes for local exports
11393     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11394       # FIXME: lock only instance primary and destination node
11395       #
11396       # Sad but true, for now we have do lock all nodes, as we don't know where
11397       # the previous export might be, and in this LU we search for it and
11398       # remove it from its current node. In the future we could fix this by:
11399       #  - making a tasklet to search (share-lock all), then create the
11400       #    new one, then one to remove, after
11401       #  - removing the removal operation altogether
11402       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11403
11404   def DeclareLocks(self, level):
11405     """Last minute lock declaration."""
11406     # All nodes are locked anyway, so nothing to do here.
11407
11408   def BuildHooksEnv(self):
11409     """Build hooks env.
11410
11411     This will run on the master, primary node and target node.
11412
11413     """
11414     env = {
11415       "EXPORT_MODE": self.op.mode,
11416       "EXPORT_NODE": self.op.target_node,
11417       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11418       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11419       # TODO: Generic function for boolean env variables
11420       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11421       }
11422
11423     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11424
11425     return env
11426
11427   def BuildHooksNodes(self):
11428     """Build hooks nodes.
11429
11430     """
11431     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11432
11433     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11434       nl.append(self.op.target_node)
11435
11436     return (nl, nl)
11437
11438   def CheckPrereq(self):
11439     """Check prerequisites.
11440
11441     This checks that the instance and node names are valid.
11442
11443     """
11444     instance_name = self.op.instance_name
11445
11446     self.instance = self.cfg.GetInstanceInfo(instance_name)
11447     assert self.instance is not None, \
11448           "Cannot retrieve locked instance %s" % self.op.instance_name
11449     _CheckNodeOnline(self, self.instance.primary_node)
11450
11451     if (self.op.remove_instance and self.instance.admin_up and
11452         not self.op.shutdown):
11453       raise errors.OpPrereqError("Can not remove instance without shutting it"
11454                                  " down before")
11455
11456     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11457       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11458       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11459       assert self.dst_node is not None
11460
11461       _CheckNodeOnline(self, self.dst_node.name)
11462       _CheckNodeNotDrained(self, self.dst_node.name)
11463
11464       self._cds = None
11465       self.dest_disk_info = None
11466       self.dest_x509_ca = None
11467
11468     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11469       self.dst_node = None
11470
11471       if len(self.op.target_node) != len(self.instance.disks):
11472         raise errors.OpPrereqError(("Received destination information for %s"
11473                                     " disks, but instance %s has %s disks") %
11474                                    (len(self.op.target_node), instance_name,
11475                                     len(self.instance.disks)),
11476                                    errors.ECODE_INVAL)
11477
11478       cds = _GetClusterDomainSecret()
11479
11480       # Check X509 key name
11481       try:
11482         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11483       except (TypeError, ValueError), err:
11484         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11485
11486       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11487         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11488                                    errors.ECODE_INVAL)
11489
11490       # Load and verify CA
11491       try:
11492         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11493       except OpenSSL.crypto.Error, err:
11494         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11495                                    (err, ), errors.ECODE_INVAL)
11496
11497       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11498       if errcode is not None:
11499         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11500                                    (msg, ), errors.ECODE_INVAL)
11501
11502       self.dest_x509_ca = cert
11503
11504       # Verify target information
11505       disk_info = []
11506       for idx, disk_data in enumerate(self.op.target_node):
11507         try:
11508           (host, port, magic) = \
11509             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11510         except errors.GenericError, err:
11511           raise errors.OpPrereqError("Target info for disk %s: %s" %
11512                                      (idx, err), errors.ECODE_INVAL)
11513
11514         disk_info.append((host, port, magic))
11515
11516       assert len(disk_info) == len(self.op.target_node)
11517       self.dest_disk_info = disk_info
11518
11519     else:
11520       raise errors.ProgrammerError("Unhandled export mode %r" %
11521                                    self.op.mode)
11522
11523     # instance disk type verification
11524     # TODO: Implement export support for file-based disks
11525     for disk in self.instance.disks:
11526       if disk.dev_type == constants.LD_FILE:
11527         raise errors.OpPrereqError("Export not supported for instances with"
11528                                    " file-based disks", errors.ECODE_INVAL)
11529
11530   def _CleanupExports(self, feedback_fn):
11531     """Removes exports of current instance from all other nodes.
11532
11533     If an instance in a cluster with nodes A..D was exported to node C, its
11534     exports will be removed from the nodes A, B and D.
11535
11536     """
11537     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11538
11539     nodelist = self.cfg.GetNodeList()
11540     nodelist.remove(self.dst_node.name)
11541
11542     # on one-node clusters nodelist will be empty after the removal
11543     # if we proceed the backup would be removed because OpBackupQuery
11544     # substitutes an empty list with the full cluster node list.
11545     iname = self.instance.name
11546     if nodelist:
11547       feedback_fn("Removing old exports for instance %s" % iname)
11548       exportlist = self.rpc.call_export_list(nodelist)
11549       for node in exportlist:
11550         if exportlist[node].fail_msg:
11551           continue
11552         if iname in exportlist[node].payload:
11553           msg = self.rpc.call_export_remove(node, iname).fail_msg
11554           if msg:
11555             self.LogWarning("Could not remove older export for instance %s"
11556                             " on node %s: %s", iname, node, msg)
11557
11558   def Exec(self, feedback_fn):
11559     """Export an instance to an image in the cluster.
11560
11561     """
11562     assert self.op.mode in constants.EXPORT_MODES
11563
11564     instance = self.instance
11565     src_node = instance.primary_node
11566
11567     if self.op.shutdown:
11568       # shutdown the instance, but not the disks
11569       feedback_fn("Shutting down instance %s" % instance.name)
11570       result = self.rpc.call_instance_shutdown(src_node, instance,
11571                                                self.op.shutdown_timeout)
11572       # TODO: Maybe ignore failures if ignore_remove_failures is set
11573       result.Raise("Could not shutdown instance %s on"
11574                    " node %s" % (instance.name, src_node))
11575
11576     # set the disks ID correctly since call_instance_start needs the
11577     # correct drbd minor to create the symlinks
11578     for disk in instance.disks:
11579       self.cfg.SetDiskID(disk, src_node)
11580
11581     activate_disks = (not instance.admin_up)
11582
11583     if activate_disks:
11584       # Activate the instance disks if we'exporting a stopped instance
11585       feedback_fn("Activating disks for %s" % instance.name)
11586       _StartInstanceDisks(self, instance, None)
11587
11588     try:
11589       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11590                                                      instance)
11591
11592       helper.CreateSnapshots()
11593       try:
11594         if (self.op.shutdown and instance.admin_up and
11595             not self.op.remove_instance):
11596           assert not activate_disks
11597           feedback_fn("Starting instance %s" % instance.name)
11598           result = self.rpc.call_instance_start(src_node, instance,
11599                                                 None, None, False)
11600           msg = result.fail_msg
11601           if msg:
11602             feedback_fn("Failed to start instance: %s" % msg)
11603             _ShutdownInstanceDisks(self, instance)
11604             raise errors.OpExecError("Could not start instance: %s" % msg)
11605
11606         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11607           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11608         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11609           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11610           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11611
11612           (key_name, _, _) = self.x509_key_name
11613
11614           dest_ca_pem = \
11615             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11616                                             self.dest_x509_ca)
11617
11618           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11619                                                      key_name, dest_ca_pem,
11620                                                      timeouts)
11621       finally:
11622         helper.Cleanup()
11623
11624       # Check for backwards compatibility
11625       assert len(dresults) == len(instance.disks)
11626       assert compat.all(isinstance(i, bool) for i in dresults), \
11627              "Not all results are boolean: %r" % dresults
11628
11629     finally:
11630       if activate_disks:
11631         feedback_fn("Deactivating disks for %s" % instance.name)
11632         _ShutdownInstanceDisks(self, instance)
11633
11634     if not (compat.all(dresults) and fin_resu):
11635       failures = []
11636       if not fin_resu:
11637         failures.append("export finalization")
11638       if not compat.all(dresults):
11639         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11640                                if not dsk)
11641         failures.append("disk export: disk(s) %s" % fdsk)
11642
11643       raise errors.OpExecError("Export failed, errors in %s" %
11644                                utils.CommaJoin(failures))
11645
11646     # At this point, the export was successful, we can cleanup/finish
11647
11648     # Remove instance if requested
11649     if self.op.remove_instance:
11650       feedback_fn("Removing instance %s" % instance.name)
11651       _RemoveInstance(self, feedback_fn, instance,
11652                       self.op.ignore_remove_failures)
11653
11654     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11655       self._CleanupExports(feedback_fn)
11656
11657     return fin_resu, dresults
11658
11659
11660 class LUBackupRemove(NoHooksLU):
11661   """Remove exports related to the named instance.
11662
11663   """
11664   REQ_BGL = False
11665
11666   def ExpandNames(self):
11667     self.needed_locks = {}
11668     # We need all nodes to be locked in order for RemoveExport to work, but we
11669     # don't need to lock the instance itself, as nothing will happen to it (and
11670     # we can remove exports also for a removed instance)
11671     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11672
11673   def Exec(self, feedback_fn):
11674     """Remove any export.
11675
11676     """
11677     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11678     # If the instance was not found we'll try with the name that was passed in.
11679     # This will only work if it was an FQDN, though.
11680     fqdn_warn = False
11681     if not instance_name:
11682       fqdn_warn = True
11683       instance_name = self.op.instance_name
11684
11685     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11686     exportlist = self.rpc.call_export_list(locked_nodes)
11687     found = False
11688     for node in exportlist:
11689       msg = exportlist[node].fail_msg
11690       if msg:
11691         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11692         continue
11693       if instance_name in exportlist[node].payload:
11694         found = True
11695         result = self.rpc.call_export_remove(node, instance_name)
11696         msg = result.fail_msg
11697         if msg:
11698           logging.error("Could not remove export for instance %s"
11699                         " on node %s: %s", instance_name, node, msg)
11700
11701     if fqdn_warn and not found:
11702       feedback_fn("Export not found. If trying to remove an export belonging"
11703                   " to a deleted instance please use its Fully Qualified"
11704                   " Domain Name.")
11705
11706
11707 class LUGroupAdd(LogicalUnit):
11708   """Logical unit for creating node groups.
11709
11710   """
11711   HPATH = "group-add"
11712   HTYPE = constants.HTYPE_GROUP
11713   REQ_BGL = False
11714
11715   def ExpandNames(self):
11716     # We need the new group's UUID here so that we can create and acquire the
11717     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11718     # that it should not check whether the UUID exists in the configuration.
11719     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11720     self.needed_locks = {}
11721     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11722
11723   def CheckPrereq(self):
11724     """Check prerequisites.
11725
11726     This checks that the given group name is not an existing node group
11727     already.
11728
11729     """
11730     try:
11731       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11732     except errors.OpPrereqError:
11733       pass
11734     else:
11735       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11736                                  " node group (UUID: %s)" %
11737                                  (self.op.group_name, existing_uuid),
11738                                  errors.ECODE_EXISTS)
11739
11740     if self.op.ndparams:
11741       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11742
11743   def BuildHooksEnv(self):
11744     """Build hooks env.
11745
11746     """
11747     return {
11748       "GROUP_NAME": self.op.group_name,
11749       }
11750
11751   def BuildHooksNodes(self):
11752     """Build hooks nodes.
11753
11754     """
11755     mn = self.cfg.GetMasterNode()
11756     return ([mn], [mn])
11757
11758   def Exec(self, feedback_fn):
11759     """Add the node group to the cluster.
11760
11761     """
11762     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11763                                   uuid=self.group_uuid,
11764                                   alloc_policy=self.op.alloc_policy,
11765                                   ndparams=self.op.ndparams)
11766
11767     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11768     del self.remove_locks[locking.LEVEL_NODEGROUP]
11769
11770
11771 class LUGroupAssignNodes(NoHooksLU):
11772   """Logical unit for assigning nodes to groups.
11773
11774   """
11775   REQ_BGL = False
11776
11777   def ExpandNames(self):
11778     # These raise errors.OpPrereqError on their own:
11779     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11780     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11781
11782     # We want to lock all the affected nodes and groups. We have readily
11783     # available the list of nodes, and the *destination* group. To gather the
11784     # list of "source" groups, we need to fetch node information later on.
11785     self.needed_locks = {
11786       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11787       locking.LEVEL_NODE: self.op.nodes,
11788       }
11789
11790   def DeclareLocks(self, level):
11791     if level == locking.LEVEL_NODEGROUP:
11792       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11793
11794       # Try to get all affected nodes' groups without having the group or node
11795       # lock yet. Needs verification later in the code flow.
11796       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11797
11798       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11799
11800   def CheckPrereq(self):
11801     """Check prerequisites.
11802
11803     """
11804     assert self.needed_locks[locking.LEVEL_NODEGROUP]
11805     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11806             frozenset(self.op.nodes))
11807
11808     expected_locks = (set([self.group_uuid]) |
11809                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11810     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11811     if actual_locks != expected_locks:
11812       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11813                                " current groups are '%s', used to be '%s'" %
11814                                (utils.CommaJoin(expected_locks),
11815                                 utils.CommaJoin(actual_locks)))
11816
11817     self.node_data = self.cfg.GetAllNodesInfo()
11818     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11819     instance_data = self.cfg.GetAllInstancesInfo()
11820
11821     if self.group is None:
11822       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11823                                (self.op.group_name, self.group_uuid))
11824
11825     (new_splits, previous_splits) = \
11826       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11827                                              for node in self.op.nodes],
11828                                             self.node_data, instance_data)
11829
11830     if new_splits:
11831       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11832
11833       if not self.op.force:
11834         raise errors.OpExecError("The following instances get split by this"
11835                                  " change and --force was not given: %s" %
11836                                  fmt_new_splits)
11837       else:
11838         self.LogWarning("This operation will split the following instances: %s",
11839                         fmt_new_splits)
11840
11841         if previous_splits:
11842           self.LogWarning("In addition, these already-split instances continue"
11843                           " to be split across groups: %s",
11844                           utils.CommaJoin(utils.NiceSort(previous_splits)))
11845
11846   def Exec(self, feedback_fn):
11847     """Assign nodes to a new group.
11848
11849     """
11850     for node in self.op.nodes:
11851       self.node_data[node].group = self.group_uuid
11852
11853     # FIXME: Depends on side-effects of modifying the result of
11854     # C{cfg.GetAllNodesInfo}
11855
11856     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11857
11858   @staticmethod
11859   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11860     """Check for split instances after a node assignment.
11861
11862     This method considers a series of node assignments as an atomic operation,
11863     and returns information about split instances after applying the set of
11864     changes.
11865
11866     In particular, it returns information about newly split instances, and
11867     instances that were already split, and remain so after the change.
11868
11869     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11870     considered.
11871
11872     @type changes: list of (node_name, new_group_uuid) pairs.
11873     @param changes: list of node assignments to consider.
11874     @param node_data: a dict with data for all nodes
11875     @param instance_data: a dict with all instances to consider
11876     @rtype: a two-tuple
11877     @return: a list of instances that were previously okay and result split as a
11878       consequence of this change, and a list of instances that were previously
11879       split and this change does not fix.
11880
11881     """
11882     changed_nodes = dict((node, group) for node, group in changes
11883                          if node_data[node].group != group)
11884
11885     all_split_instances = set()
11886     previously_split_instances = set()
11887
11888     def InstanceNodes(instance):
11889       return [instance.primary_node] + list(instance.secondary_nodes)
11890
11891     for inst in instance_data.values():
11892       if inst.disk_template not in constants.DTS_INT_MIRROR:
11893         continue
11894
11895       instance_nodes = InstanceNodes(inst)
11896
11897       if len(set(node_data[node].group for node in instance_nodes)) > 1:
11898         previously_split_instances.add(inst.name)
11899
11900       if len(set(changed_nodes.get(node, node_data[node].group)
11901                  for node in instance_nodes)) > 1:
11902         all_split_instances.add(inst.name)
11903
11904     return (list(all_split_instances - previously_split_instances),
11905             list(previously_split_instances & all_split_instances))
11906
11907
11908 class _GroupQuery(_QueryBase):
11909   FIELDS = query.GROUP_FIELDS
11910
11911   def ExpandNames(self, lu):
11912     lu.needed_locks = {}
11913
11914     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11915     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11916
11917     if not self.names:
11918       self.wanted = [name_to_uuid[name]
11919                      for name in utils.NiceSort(name_to_uuid.keys())]
11920     else:
11921       # Accept names to be either names or UUIDs.
11922       missing = []
11923       self.wanted = []
11924       all_uuid = frozenset(self._all_groups.keys())
11925
11926       for name in self.names:
11927         if name in all_uuid:
11928           self.wanted.append(name)
11929         elif name in name_to_uuid:
11930           self.wanted.append(name_to_uuid[name])
11931         else:
11932           missing.append(name)
11933
11934       if missing:
11935         raise errors.OpPrereqError("Some groups do not exist: %s" %
11936                                    utils.CommaJoin(missing),
11937                                    errors.ECODE_NOENT)
11938
11939   def DeclareLocks(self, lu, level):
11940     pass
11941
11942   def _GetQueryData(self, lu):
11943     """Computes the list of node groups and their attributes.
11944
11945     """
11946     do_nodes = query.GQ_NODE in self.requested_data
11947     do_instances = query.GQ_INST in self.requested_data
11948
11949     group_to_nodes = None
11950     group_to_instances = None
11951
11952     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11953     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11954     # latter GetAllInstancesInfo() is not enough, for we have to go through
11955     # instance->node. Hence, we will need to process nodes even if we only need
11956     # instance information.
11957     if do_nodes or do_instances:
11958       all_nodes = lu.cfg.GetAllNodesInfo()
11959       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11960       node_to_group = {}
11961
11962       for node in all_nodes.values():
11963         if node.group in group_to_nodes:
11964           group_to_nodes[node.group].append(node.name)
11965           node_to_group[node.name] = node.group
11966
11967       if do_instances:
11968         all_instances = lu.cfg.GetAllInstancesInfo()
11969         group_to_instances = dict((uuid, []) for uuid in self.wanted)
11970
11971         for instance in all_instances.values():
11972           node = instance.primary_node
11973           if node in node_to_group:
11974             group_to_instances[node_to_group[node]].append(instance.name)
11975
11976         if not do_nodes:
11977           # Do not pass on node information if it was not requested.
11978           group_to_nodes = None
11979
11980     return query.GroupQueryData([self._all_groups[uuid]
11981                                  for uuid in self.wanted],
11982                                 group_to_nodes, group_to_instances)
11983
11984
11985 class LUGroupQuery(NoHooksLU):
11986   """Logical unit for querying node groups.
11987
11988   """
11989   REQ_BGL = False
11990
11991   def CheckArguments(self):
11992     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11993                           self.op.output_fields, False)
11994
11995   def ExpandNames(self):
11996     self.gq.ExpandNames(self)
11997
11998   def Exec(self, feedback_fn):
11999     return self.gq.OldStyleQuery(self)
12000
12001
12002 class LUGroupSetParams(LogicalUnit):
12003   """Modifies the parameters of a node group.
12004
12005   """
12006   HPATH = "group-modify"
12007   HTYPE = constants.HTYPE_GROUP
12008   REQ_BGL = False
12009
12010   def CheckArguments(self):
12011     all_changes = [
12012       self.op.ndparams,
12013       self.op.alloc_policy,
12014       ]
12015
12016     if all_changes.count(None) == len(all_changes):
12017       raise errors.OpPrereqError("Please pass at least one modification",
12018                                  errors.ECODE_INVAL)
12019
12020   def ExpandNames(self):
12021     # This raises errors.OpPrereqError on its own:
12022     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12023
12024     self.needed_locks = {
12025       locking.LEVEL_NODEGROUP: [self.group_uuid],
12026       }
12027
12028   def CheckPrereq(self):
12029     """Check prerequisites.
12030
12031     """
12032     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12033
12034     if self.group is None:
12035       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12036                                (self.op.group_name, self.group_uuid))
12037
12038     if self.op.ndparams:
12039       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12040       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12041       self.new_ndparams = new_ndparams
12042
12043   def BuildHooksEnv(self):
12044     """Build hooks env.
12045
12046     """
12047     return {
12048       "GROUP_NAME": self.op.group_name,
12049       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12050       }
12051
12052   def BuildHooksNodes(self):
12053     """Build hooks nodes.
12054
12055     """
12056     mn = self.cfg.GetMasterNode()
12057     return ([mn], [mn])
12058
12059   def Exec(self, feedback_fn):
12060     """Modifies the node group.
12061
12062     """
12063     result = []
12064
12065     if self.op.ndparams:
12066       self.group.ndparams = self.new_ndparams
12067       result.append(("ndparams", str(self.group.ndparams)))
12068
12069     if self.op.alloc_policy:
12070       self.group.alloc_policy = self.op.alloc_policy
12071
12072     self.cfg.Update(self.group, feedback_fn)
12073     return result
12074
12075
12076
12077 class LUGroupRemove(LogicalUnit):
12078   HPATH = "group-remove"
12079   HTYPE = constants.HTYPE_GROUP
12080   REQ_BGL = False
12081
12082   def ExpandNames(self):
12083     # This will raises errors.OpPrereqError on its own:
12084     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12085     self.needed_locks = {
12086       locking.LEVEL_NODEGROUP: [self.group_uuid],
12087       }
12088
12089   def CheckPrereq(self):
12090     """Check prerequisites.
12091
12092     This checks that the given group name exists as a node group, that is
12093     empty (i.e., contains no nodes), and that is not the last group of the
12094     cluster.
12095
12096     """
12097     # Verify that the group is empty.
12098     group_nodes = [node.name
12099                    for node in self.cfg.GetAllNodesInfo().values()
12100                    if node.group == self.group_uuid]
12101
12102     if group_nodes:
12103       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12104                                  " nodes: %s" %
12105                                  (self.op.group_name,
12106                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12107                                  errors.ECODE_STATE)
12108
12109     # Verify the cluster would not be left group-less.
12110     if len(self.cfg.GetNodeGroupList()) == 1:
12111       raise errors.OpPrereqError("Group '%s' is the only group,"
12112                                  " cannot be removed" %
12113                                  self.op.group_name,
12114                                  errors.ECODE_STATE)
12115
12116   def BuildHooksEnv(self):
12117     """Build hooks env.
12118
12119     """
12120     return {
12121       "GROUP_NAME": self.op.group_name,
12122       }
12123
12124   def BuildHooksNodes(self):
12125     """Build hooks nodes.
12126
12127     """
12128     mn = self.cfg.GetMasterNode()
12129     return ([mn], [mn])
12130
12131   def Exec(self, feedback_fn):
12132     """Remove the node group.
12133
12134     """
12135     try:
12136       self.cfg.RemoveNodeGroup(self.group_uuid)
12137     except errors.ConfigurationError:
12138       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12139                                (self.op.group_name, self.group_uuid))
12140
12141     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12142
12143
12144 class LUGroupRename(LogicalUnit):
12145   HPATH = "group-rename"
12146   HTYPE = constants.HTYPE_GROUP
12147   REQ_BGL = False
12148
12149   def ExpandNames(self):
12150     # This raises errors.OpPrereqError on its own:
12151     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12152
12153     self.needed_locks = {
12154       locking.LEVEL_NODEGROUP: [self.group_uuid],
12155       }
12156
12157   def CheckPrereq(self):
12158     """Check prerequisites.
12159
12160     Ensures requested new name is not yet used.
12161
12162     """
12163     try:
12164       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12165     except errors.OpPrereqError:
12166       pass
12167     else:
12168       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12169                                  " node group (UUID: %s)" %
12170                                  (self.op.new_name, new_name_uuid),
12171                                  errors.ECODE_EXISTS)
12172
12173   def BuildHooksEnv(self):
12174     """Build hooks env.
12175
12176     """
12177     return {
12178       "OLD_NAME": self.op.group_name,
12179       "NEW_NAME": self.op.new_name,
12180       }
12181
12182   def BuildHooksNodes(self):
12183     """Build hooks nodes.
12184
12185     """
12186     mn = self.cfg.GetMasterNode()
12187
12188     all_nodes = self.cfg.GetAllNodesInfo()
12189     all_nodes.pop(mn, None)
12190
12191     run_nodes = [mn]
12192     run_nodes.extend(node.name for node in all_nodes.values()
12193                      if node.group == self.group_uuid)
12194
12195     return (run_nodes, run_nodes)
12196
12197   def Exec(self, feedback_fn):
12198     """Rename the node group.
12199
12200     """
12201     group = self.cfg.GetNodeGroup(self.group_uuid)
12202
12203     if group is None:
12204       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12205                                (self.op.group_name, self.group_uuid))
12206
12207     group.name = self.op.new_name
12208     self.cfg.Update(group, feedback_fn)
12209
12210     return self.op.new_name
12211
12212
12213 class LUGroupEvacuate(LogicalUnit):
12214   HPATH = "group-evacuate"
12215   HTYPE = constants.HTYPE_GROUP
12216   REQ_BGL = False
12217
12218   def ExpandNames(self):
12219     # This raises errors.OpPrereqError on its own:
12220     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12221
12222     if self.op.target_groups:
12223       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12224                                   self.op.target_groups)
12225     else:
12226       self.req_target_uuids = []
12227
12228     if self.group_uuid in self.req_target_uuids:
12229       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12230                                  " as a target group (targets are %s)" %
12231                                  (self.group_uuid,
12232                                   utils.CommaJoin(self.req_target_uuids)),
12233                                  errors.ECODE_INVAL)
12234
12235     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12236
12237     self.share_locks = _ShareAll()
12238     self.needed_locks = {
12239       locking.LEVEL_INSTANCE: [],
12240       locking.LEVEL_NODEGROUP: [],
12241       locking.LEVEL_NODE: [],
12242       }
12243
12244   def DeclareLocks(self, level):
12245     if level == locking.LEVEL_INSTANCE:
12246       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12247
12248       # Lock instances optimistically, needs verification once node and group
12249       # locks have been acquired
12250       self.needed_locks[locking.LEVEL_INSTANCE] = \
12251         self.cfg.GetNodeGroupInstances(self.group_uuid)
12252
12253     elif level == locking.LEVEL_NODEGROUP:
12254       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12255
12256       if self.req_target_uuids:
12257         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12258
12259         # Lock all groups used by instances optimistically; this requires going
12260         # via the node before it's locked, requiring verification later on
12261         lock_groups.update(group_uuid
12262                            for instance_name in
12263                              self.owned_locks(locking.LEVEL_INSTANCE)
12264                            for group_uuid in
12265                              self.cfg.GetInstanceNodeGroups(instance_name))
12266       else:
12267         # No target groups, need to lock all of them
12268         lock_groups = locking.ALL_SET
12269
12270       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12271
12272     elif level == locking.LEVEL_NODE:
12273       # This will only lock the nodes in the group to be evacuated which
12274       # contain actual instances
12275       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12276       self._LockInstancesNodes()
12277
12278       # Lock all nodes in group to be evacuated and target groups
12279       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12280       assert self.group_uuid in owned_groups
12281       member_nodes = [node_name
12282                       for group in owned_groups
12283                       for node_name in self.cfg.GetNodeGroup(group).members]
12284       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12285
12286   def CheckPrereq(self):
12287     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12288     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12289     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12290
12291     assert owned_groups.issuperset(self.req_target_uuids)
12292     assert self.group_uuid in owned_groups
12293
12294     # Check if locked instances are still correct
12295     wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
12296     if owned_instances != wanted_instances:
12297       raise errors.OpPrereqError("Instances in node group to be evacuated (%s)"
12298                                  " changed since locks were acquired, wanted"
12299                                  " %s, have %s; retry the operation" %
12300                                  (self.group_uuid,
12301                                   utils.CommaJoin(wanted_instances),
12302                                   utils.CommaJoin(owned_instances)),
12303                                  errors.ECODE_STATE)
12304
12305     # Get instance information
12306     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12307
12308     # Check if node groups for locked instances are still correct
12309     for instance_name in owned_instances:
12310       inst = self.instances[instance_name]
12311       assert owned_nodes.issuperset(inst.all_nodes), \
12312         "Instance %s's nodes changed while we kept the lock" % instance_name
12313
12314       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12315                                              owned_groups)
12316
12317       assert self.group_uuid in inst_groups, \
12318         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12319
12320     if self.req_target_uuids:
12321       # User requested specific target groups
12322       self.target_uuids = self.req_target_uuids
12323     else:
12324       # All groups except the one to be evacuated are potential targets
12325       self.target_uuids = [group_uuid for group_uuid in owned_groups
12326                            if group_uuid != self.group_uuid]
12327
12328       if not self.target_uuids:
12329         raise errors.OpPrereqError("There are no possible target groups",
12330                                    errors.ECODE_INVAL)
12331
12332   def BuildHooksEnv(self):
12333     """Build hooks env.
12334
12335     """
12336     return {
12337       "GROUP_NAME": self.op.group_name,
12338       "TARGET_GROUPS": " ".join(self.target_uuids),
12339       }
12340
12341   def BuildHooksNodes(self):
12342     """Build hooks nodes.
12343
12344     """
12345     mn = self.cfg.GetMasterNode()
12346
12347     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12348
12349     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12350
12351     return (run_nodes, run_nodes)
12352
12353   def Exec(self, feedback_fn):
12354     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12355
12356     assert self.group_uuid not in self.target_uuids
12357
12358     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12359                      instances=instances, target_groups=self.target_uuids)
12360
12361     ial.Run(self.op.iallocator)
12362
12363     if not ial.success:
12364       raise errors.OpPrereqError("Can't compute group evacuation using"
12365                                  " iallocator '%s': %s" %
12366                                  (self.op.iallocator, ial.info),
12367                                  errors.ECODE_NORES)
12368
12369     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12370
12371     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12372                  len(jobs), self.op.group_name)
12373
12374     return ResultWithJobs(jobs)
12375
12376
12377 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12378   """Generic tags LU.
12379
12380   This is an abstract class which is the parent of all the other tags LUs.
12381
12382   """
12383   def ExpandNames(self):
12384     self.group_uuid = None
12385     self.needed_locks = {}
12386     if self.op.kind == constants.TAG_NODE:
12387       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12388       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12389     elif self.op.kind == constants.TAG_INSTANCE:
12390       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12391       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12392     elif self.op.kind == constants.TAG_NODEGROUP:
12393       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12394
12395     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12396     # not possible to acquire the BGL based on opcode parameters)
12397
12398   def CheckPrereq(self):
12399     """Check prerequisites.
12400
12401     """
12402     if self.op.kind == constants.TAG_CLUSTER:
12403       self.target = self.cfg.GetClusterInfo()
12404     elif self.op.kind == constants.TAG_NODE:
12405       self.target = self.cfg.GetNodeInfo(self.op.name)
12406     elif self.op.kind == constants.TAG_INSTANCE:
12407       self.target = self.cfg.GetInstanceInfo(self.op.name)
12408     elif self.op.kind == constants.TAG_NODEGROUP:
12409       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12410     else:
12411       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12412                                  str(self.op.kind), errors.ECODE_INVAL)
12413
12414
12415 class LUTagsGet(TagsLU):
12416   """Returns the tags of a given object.
12417
12418   """
12419   REQ_BGL = False
12420
12421   def ExpandNames(self):
12422     TagsLU.ExpandNames(self)
12423
12424     # Share locks as this is only a read operation
12425     self.share_locks = _ShareAll()
12426
12427   def Exec(self, feedback_fn):
12428     """Returns the tag list.
12429
12430     """
12431     return list(self.target.GetTags())
12432
12433
12434 class LUTagsSearch(NoHooksLU):
12435   """Searches the tags for a given pattern.
12436
12437   """
12438   REQ_BGL = False
12439
12440   def ExpandNames(self):
12441     self.needed_locks = {}
12442
12443   def CheckPrereq(self):
12444     """Check prerequisites.
12445
12446     This checks the pattern passed for validity by compiling it.
12447
12448     """
12449     try:
12450       self.re = re.compile(self.op.pattern)
12451     except re.error, err:
12452       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12453                                  (self.op.pattern, err), errors.ECODE_INVAL)
12454
12455   def Exec(self, feedback_fn):
12456     """Returns the tag list.
12457
12458     """
12459     cfg = self.cfg
12460     tgts = [("/cluster", cfg.GetClusterInfo())]
12461     ilist = cfg.GetAllInstancesInfo().values()
12462     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12463     nlist = cfg.GetAllNodesInfo().values()
12464     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12465     tgts.extend(("/nodegroup/%s" % n.name, n)
12466                 for n in cfg.GetAllNodeGroupsInfo().values())
12467     results = []
12468     for path, target in tgts:
12469       for tag in target.GetTags():
12470         if self.re.search(tag):
12471           results.append((path, tag))
12472     return results
12473
12474
12475 class LUTagsSet(TagsLU):
12476   """Sets a tag on a given object.
12477
12478   """
12479   REQ_BGL = False
12480
12481   def CheckPrereq(self):
12482     """Check prerequisites.
12483
12484     This checks the type and length of the tag name and value.
12485
12486     """
12487     TagsLU.CheckPrereq(self)
12488     for tag in self.op.tags:
12489       objects.TaggableObject.ValidateTag(tag)
12490
12491   def Exec(self, feedback_fn):
12492     """Sets the tag.
12493
12494     """
12495     try:
12496       for tag in self.op.tags:
12497         self.target.AddTag(tag)
12498     except errors.TagError, err:
12499       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12500     self.cfg.Update(self.target, feedback_fn)
12501
12502
12503 class LUTagsDel(TagsLU):
12504   """Delete a list of tags from a given object.
12505
12506   """
12507   REQ_BGL = False
12508
12509   def CheckPrereq(self):
12510     """Check prerequisites.
12511
12512     This checks that we have the given tag.
12513
12514     """
12515     TagsLU.CheckPrereq(self)
12516     for tag in self.op.tags:
12517       objects.TaggableObject.ValidateTag(tag)
12518     del_tags = frozenset(self.op.tags)
12519     cur_tags = self.target.GetTags()
12520
12521     diff_tags = del_tags - cur_tags
12522     if diff_tags:
12523       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12524       raise errors.OpPrereqError("Tag(s) %s not found" %
12525                                  (utils.CommaJoin(diff_names), ),
12526                                  errors.ECODE_NOENT)
12527
12528   def Exec(self, feedback_fn):
12529     """Remove the tag from the object.
12530
12531     """
12532     for tag in self.op.tags:
12533       self.target.RemoveTag(tag)
12534     self.cfg.Update(self.target, feedback_fn)
12535
12536
12537 class LUTestDelay(NoHooksLU):
12538   """Sleep for a specified amount of time.
12539
12540   This LU sleeps on the master and/or nodes for a specified amount of
12541   time.
12542
12543   """
12544   REQ_BGL = False
12545
12546   def ExpandNames(self):
12547     """Expand names and set required locks.
12548
12549     This expands the node list, if any.
12550
12551     """
12552     self.needed_locks = {}
12553     if self.op.on_nodes:
12554       # _GetWantedNodes can be used here, but is not always appropriate to use
12555       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12556       # more information.
12557       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12558       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12559
12560   def _TestDelay(self):
12561     """Do the actual sleep.
12562
12563     """
12564     if self.op.on_master:
12565       if not utils.TestDelay(self.op.duration):
12566         raise errors.OpExecError("Error during master delay test")
12567     if self.op.on_nodes:
12568       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12569       for node, node_result in result.items():
12570         node_result.Raise("Failure during rpc call to node %s" % node)
12571
12572   def Exec(self, feedback_fn):
12573     """Execute the test delay opcode, with the wanted repetitions.
12574
12575     """
12576     if self.op.repeat == 0:
12577       self._TestDelay()
12578     else:
12579       top_value = self.op.repeat - 1
12580       for i in range(self.op.repeat):
12581         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12582         self._TestDelay()
12583
12584
12585 class LUTestJqueue(NoHooksLU):
12586   """Utility LU to test some aspects of the job queue.
12587
12588   """
12589   REQ_BGL = False
12590
12591   # Must be lower than default timeout for WaitForJobChange to see whether it
12592   # notices changed jobs
12593   _CLIENT_CONNECT_TIMEOUT = 20.0
12594   _CLIENT_CONFIRM_TIMEOUT = 60.0
12595
12596   @classmethod
12597   def _NotifyUsingSocket(cls, cb, errcls):
12598     """Opens a Unix socket and waits for another program to connect.
12599
12600     @type cb: callable
12601     @param cb: Callback to send socket name to client
12602     @type errcls: class
12603     @param errcls: Exception class to use for errors
12604
12605     """
12606     # Using a temporary directory as there's no easy way to create temporary
12607     # sockets without writing a custom loop around tempfile.mktemp and
12608     # socket.bind
12609     tmpdir = tempfile.mkdtemp()
12610     try:
12611       tmpsock = utils.PathJoin(tmpdir, "sock")
12612
12613       logging.debug("Creating temporary socket at %s", tmpsock)
12614       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12615       try:
12616         sock.bind(tmpsock)
12617         sock.listen(1)
12618
12619         # Send details to client
12620         cb(tmpsock)
12621
12622         # Wait for client to connect before continuing
12623         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12624         try:
12625           (conn, _) = sock.accept()
12626         except socket.error, err:
12627           raise errcls("Client didn't connect in time (%s)" % err)
12628       finally:
12629         sock.close()
12630     finally:
12631       # Remove as soon as client is connected
12632       shutil.rmtree(tmpdir)
12633
12634     # Wait for client to close
12635     try:
12636       try:
12637         # pylint: disable-msg=E1101
12638         # Instance of '_socketobject' has no ... member
12639         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12640         conn.recv(1)
12641       except socket.error, err:
12642         raise errcls("Client failed to confirm notification (%s)" % err)
12643     finally:
12644       conn.close()
12645
12646   def _SendNotification(self, test, arg, sockname):
12647     """Sends a notification to the client.
12648
12649     @type test: string
12650     @param test: Test name
12651     @param arg: Test argument (depends on test)
12652     @type sockname: string
12653     @param sockname: Socket path
12654
12655     """
12656     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12657
12658   def _Notify(self, prereq, test, arg):
12659     """Notifies the client of a test.
12660
12661     @type prereq: bool
12662     @param prereq: Whether this is a prereq-phase test
12663     @type test: string
12664     @param test: Test name
12665     @param arg: Test argument (depends on test)
12666
12667     """
12668     if prereq:
12669       errcls = errors.OpPrereqError
12670     else:
12671       errcls = errors.OpExecError
12672
12673     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12674                                                   test, arg),
12675                                    errcls)
12676
12677   def CheckArguments(self):
12678     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12679     self.expandnames_calls = 0
12680
12681   def ExpandNames(self):
12682     checkargs_calls = getattr(self, "checkargs_calls", 0)
12683     if checkargs_calls < 1:
12684       raise errors.ProgrammerError("CheckArguments was not called")
12685
12686     self.expandnames_calls += 1
12687
12688     if self.op.notify_waitlock:
12689       self._Notify(True, constants.JQT_EXPANDNAMES, None)
12690
12691     self.LogInfo("Expanding names")
12692
12693     # Get lock on master node (just to get a lock, not for a particular reason)
12694     self.needed_locks = {
12695       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12696       }
12697
12698   def Exec(self, feedback_fn):
12699     if self.expandnames_calls < 1:
12700       raise errors.ProgrammerError("ExpandNames was not called")
12701
12702     if self.op.notify_exec:
12703       self._Notify(False, constants.JQT_EXEC, None)
12704
12705     self.LogInfo("Executing")
12706
12707     if self.op.log_messages:
12708       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12709       for idx, msg in enumerate(self.op.log_messages):
12710         self.LogInfo("Sending log message %s", idx + 1)
12711         feedback_fn(constants.JQT_MSGPREFIX + msg)
12712         # Report how many test messages have been sent
12713         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12714
12715     if self.op.fail:
12716       raise errors.OpExecError("Opcode failure was requested")
12717
12718     return True
12719
12720
12721 class IAllocator(object):
12722   """IAllocator framework.
12723
12724   An IAllocator instance has three sets of attributes:
12725     - cfg that is needed to query the cluster
12726     - input data (all members of the _KEYS class attribute are required)
12727     - four buffer attributes (in|out_data|text), that represent the
12728       input (to the external script) in text and data structure format,
12729       and the output from it, again in two formats
12730     - the result variables from the script (success, info, nodes) for
12731       easy usage
12732
12733   """
12734   # pylint: disable-msg=R0902
12735   # lots of instance attributes
12736
12737   def __init__(self, cfg, rpc, mode, **kwargs):
12738     self.cfg = cfg
12739     self.rpc = rpc
12740     # init buffer variables
12741     self.in_text = self.out_text = self.in_data = self.out_data = None
12742     # init all input fields so that pylint is happy
12743     self.mode = mode
12744     self.memory = self.disks = self.disk_template = None
12745     self.os = self.tags = self.nics = self.vcpus = None
12746     self.hypervisor = None
12747     self.relocate_from = None
12748     self.name = None
12749     self.evac_nodes = None
12750     self.instances = None
12751     self.evac_mode = None
12752     self.target_groups = []
12753     # computed fields
12754     self.required_nodes = None
12755     # init result fields
12756     self.success = self.info = self.result = None
12757
12758     try:
12759       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12760     except KeyError:
12761       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12762                                    " IAllocator" % self.mode)
12763
12764     keyset = [n for (n, _) in keydata]
12765
12766     for key in kwargs:
12767       if key not in keyset:
12768         raise errors.ProgrammerError("Invalid input parameter '%s' to"
12769                                      " IAllocator" % key)
12770       setattr(self, key, kwargs[key])
12771
12772     for key in keyset:
12773       if key not in kwargs:
12774         raise errors.ProgrammerError("Missing input parameter '%s' to"
12775                                      " IAllocator" % key)
12776     self._BuildInputData(compat.partial(fn, self), keydata)
12777
12778   def _ComputeClusterData(self):
12779     """Compute the generic allocator input data.
12780
12781     This is the data that is independent of the actual operation.
12782
12783     """
12784     cfg = self.cfg
12785     cluster_info = cfg.GetClusterInfo()
12786     # cluster data
12787     data = {
12788       "version": constants.IALLOCATOR_VERSION,
12789       "cluster_name": cfg.GetClusterName(),
12790       "cluster_tags": list(cluster_info.GetTags()),
12791       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12792       # we don't have job IDs
12793       }
12794     ninfo = cfg.GetAllNodesInfo()
12795     iinfo = cfg.GetAllInstancesInfo().values()
12796     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12797
12798     # node data
12799     node_list = [n.name for n in ninfo.values() if n.vm_capable]
12800
12801     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12802       hypervisor_name = self.hypervisor
12803     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12804       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12805     else:
12806       hypervisor_name = cluster_info.enabled_hypervisors[0]
12807
12808     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12809                                         hypervisor_name)
12810     node_iinfo = \
12811       self.rpc.call_all_instances_info(node_list,
12812                                        cluster_info.enabled_hypervisors)
12813
12814     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12815
12816     config_ndata = self._ComputeBasicNodeData(ninfo)
12817     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12818                                                  i_list, config_ndata)
12819     assert len(data["nodes"]) == len(ninfo), \
12820         "Incomplete node data computed"
12821
12822     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12823
12824     self.in_data = data
12825
12826   @staticmethod
12827   def _ComputeNodeGroupData(cfg):
12828     """Compute node groups data.
12829
12830     """
12831     ng = dict((guuid, {
12832       "name": gdata.name,
12833       "alloc_policy": gdata.alloc_policy,
12834       })
12835       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12836
12837     return ng
12838
12839   @staticmethod
12840   def _ComputeBasicNodeData(node_cfg):
12841     """Compute global node data.
12842
12843     @rtype: dict
12844     @returns: a dict of name: (node dict, node config)
12845
12846     """
12847     # fill in static (config-based) values
12848     node_results = dict((ninfo.name, {
12849       "tags": list(ninfo.GetTags()),
12850       "primary_ip": ninfo.primary_ip,
12851       "secondary_ip": ninfo.secondary_ip,
12852       "offline": ninfo.offline,
12853       "drained": ninfo.drained,
12854       "master_candidate": ninfo.master_candidate,
12855       "group": ninfo.group,
12856       "master_capable": ninfo.master_capable,
12857       "vm_capable": ninfo.vm_capable,
12858       })
12859       for ninfo in node_cfg.values())
12860
12861     return node_results
12862
12863   @staticmethod
12864   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12865                               node_results):
12866     """Compute global node data.
12867
12868     @param node_results: the basic node structures as filled from the config
12869
12870     """
12871     # make a copy of the current dict
12872     node_results = dict(node_results)
12873     for nname, nresult in node_data.items():
12874       assert nname in node_results, "Missing basic data for node %s" % nname
12875       ninfo = node_cfg[nname]
12876
12877       if not (ninfo.offline or ninfo.drained):
12878         nresult.Raise("Can't get data for node %s" % nname)
12879         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12880                                 nname)
12881         remote_info = nresult.payload
12882
12883         for attr in ["memory_total", "memory_free", "memory_dom0",
12884                      "vg_size", "vg_free", "cpu_total"]:
12885           if attr not in remote_info:
12886             raise errors.OpExecError("Node '%s' didn't return attribute"
12887                                      " '%s'" % (nname, attr))
12888           if not isinstance(remote_info[attr], int):
12889             raise errors.OpExecError("Node '%s' returned invalid value"
12890                                      " for '%s': %s" %
12891                                      (nname, attr, remote_info[attr]))
12892         # compute memory used by primary instances
12893         i_p_mem = i_p_up_mem = 0
12894         for iinfo, beinfo in i_list:
12895           if iinfo.primary_node == nname:
12896             i_p_mem += beinfo[constants.BE_MEMORY]
12897             if iinfo.name not in node_iinfo[nname].payload:
12898               i_used_mem = 0
12899             else:
12900               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12901             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12902             remote_info["memory_free"] -= max(0, i_mem_diff)
12903
12904             if iinfo.admin_up:
12905               i_p_up_mem += beinfo[constants.BE_MEMORY]
12906
12907         # compute memory used by instances
12908         pnr_dyn = {
12909           "total_memory": remote_info["memory_total"],
12910           "reserved_memory": remote_info["memory_dom0"],
12911           "free_memory": remote_info["memory_free"],
12912           "total_disk": remote_info["vg_size"],
12913           "free_disk": remote_info["vg_free"],
12914           "total_cpus": remote_info["cpu_total"],
12915           "i_pri_memory": i_p_mem,
12916           "i_pri_up_memory": i_p_up_mem,
12917           }
12918         pnr_dyn.update(node_results[nname])
12919         node_results[nname] = pnr_dyn
12920
12921     return node_results
12922
12923   @staticmethod
12924   def _ComputeInstanceData(cluster_info, i_list):
12925     """Compute global instance data.
12926
12927     """
12928     instance_data = {}
12929     for iinfo, beinfo in i_list:
12930       nic_data = []
12931       for nic in iinfo.nics:
12932         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12933         nic_dict = {
12934           "mac": nic.mac,
12935           "ip": nic.ip,
12936           "mode": filled_params[constants.NIC_MODE],
12937           "link": filled_params[constants.NIC_LINK],
12938           }
12939         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12940           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12941         nic_data.append(nic_dict)
12942       pir = {
12943         "tags": list(iinfo.GetTags()),
12944         "admin_up": iinfo.admin_up,
12945         "vcpus": beinfo[constants.BE_VCPUS],
12946         "memory": beinfo[constants.BE_MEMORY],
12947         "os": iinfo.os,
12948         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12949         "nics": nic_data,
12950         "disks": [{constants.IDISK_SIZE: dsk.size,
12951                    constants.IDISK_MODE: dsk.mode}
12952                   for dsk in iinfo.disks],
12953         "disk_template": iinfo.disk_template,
12954         "hypervisor": iinfo.hypervisor,
12955         }
12956       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12957                                                  pir["disks"])
12958       instance_data[iinfo.name] = pir
12959
12960     return instance_data
12961
12962   def _AddNewInstance(self):
12963     """Add new instance data to allocator structure.
12964
12965     This in combination with _AllocatorGetClusterData will create the
12966     correct structure needed as input for the allocator.
12967
12968     The checks for the completeness of the opcode must have already been
12969     done.
12970
12971     """
12972     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12973
12974     if self.disk_template in constants.DTS_INT_MIRROR:
12975       self.required_nodes = 2
12976     else:
12977       self.required_nodes = 1
12978
12979     request = {
12980       "name": self.name,
12981       "disk_template": self.disk_template,
12982       "tags": self.tags,
12983       "os": self.os,
12984       "vcpus": self.vcpus,
12985       "memory": self.memory,
12986       "disks": self.disks,
12987       "disk_space_total": disk_space,
12988       "nics": self.nics,
12989       "required_nodes": self.required_nodes,
12990       "hypervisor": self.hypervisor,
12991       }
12992
12993     return request
12994
12995   def _AddRelocateInstance(self):
12996     """Add relocate instance data to allocator structure.
12997
12998     This in combination with _IAllocatorGetClusterData will create the
12999     correct structure needed as input for the allocator.
13000
13001     The checks for the completeness of the opcode must have already been
13002     done.
13003
13004     """
13005     instance = self.cfg.GetInstanceInfo(self.name)
13006     if instance is None:
13007       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13008                                    " IAllocator" % self.name)
13009
13010     if instance.disk_template not in constants.DTS_MIRRORED:
13011       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13012                                  errors.ECODE_INVAL)
13013
13014     if instance.disk_template in constants.DTS_INT_MIRROR and \
13015         len(instance.secondary_nodes) != 1:
13016       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13017                                  errors.ECODE_STATE)
13018
13019     self.required_nodes = 1
13020     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13021     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13022
13023     request = {
13024       "name": self.name,
13025       "disk_space_total": disk_space,
13026       "required_nodes": self.required_nodes,
13027       "relocate_from": self.relocate_from,
13028       }
13029     return request
13030
13031   def _AddEvacuateNodes(self):
13032     """Add evacuate nodes data to allocator structure.
13033
13034     """
13035     request = {
13036       "evac_nodes": self.evac_nodes
13037       }
13038     return request
13039
13040   def _AddNodeEvacuate(self):
13041     """Get data for node-evacuate requests.
13042
13043     """
13044     return {
13045       "instances": self.instances,
13046       "evac_mode": self.evac_mode,
13047       }
13048
13049   def _AddChangeGroup(self):
13050     """Get data for node-evacuate requests.
13051
13052     """
13053     return {
13054       "instances": self.instances,
13055       "target_groups": self.target_groups,
13056       }
13057
13058   def _BuildInputData(self, fn, keydata):
13059     """Build input data structures.
13060
13061     """
13062     self._ComputeClusterData()
13063
13064     request = fn()
13065     request["type"] = self.mode
13066     for keyname, keytype in keydata:
13067       if keyname not in request:
13068         raise errors.ProgrammerError("Request parameter %s is missing" %
13069                                      keyname)
13070       val = request[keyname]
13071       if not keytype(val):
13072         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13073                                      " validation, value %s, expected"
13074                                      " type %s" % (keyname, val, keytype))
13075     self.in_data["request"] = request
13076
13077     self.in_text = serializer.Dump(self.in_data)
13078
13079   _STRING_LIST = ht.TListOf(ht.TString)
13080   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13081      # pylint: disable-msg=E1101
13082      # Class '...' has no 'OP_ID' member
13083      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13084                           opcodes.OpInstanceMigrate.OP_ID,
13085                           opcodes.OpInstanceReplaceDisks.OP_ID])
13086      })))
13087
13088   _NEVAC_MOVED = \
13089     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13090                        ht.TItems([ht.TNonEmptyString,
13091                                   ht.TNonEmptyString,
13092                                   ht.TListOf(ht.TNonEmptyString),
13093                                  ])))
13094   _NEVAC_FAILED = \
13095     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13096                        ht.TItems([ht.TNonEmptyString,
13097                                   ht.TMaybeString,
13098                                  ])))
13099   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13100                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13101
13102   _MODE_DATA = {
13103     constants.IALLOCATOR_MODE_ALLOC:
13104       (_AddNewInstance,
13105        [
13106         ("name", ht.TString),
13107         ("memory", ht.TInt),
13108         ("disks", ht.TListOf(ht.TDict)),
13109         ("disk_template", ht.TString),
13110         ("os", ht.TString),
13111         ("tags", _STRING_LIST),
13112         ("nics", ht.TListOf(ht.TDict)),
13113         ("vcpus", ht.TInt),
13114         ("hypervisor", ht.TString),
13115         ], ht.TList),
13116     constants.IALLOCATOR_MODE_RELOC:
13117       (_AddRelocateInstance,
13118        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13119        ht.TList),
13120     constants.IALLOCATOR_MODE_MEVAC:
13121       (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
13122        ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
13123      constants.IALLOCATOR_MODE_NODE_EVAC:
13124       (_AddNodeEvacuate, [
13125         ("instances", _STRING_LIST),
13126         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13127         ], _NEVAC_RESULT),
13128      constants.IALLOCATOR_MODE_CHG_GROUP:
13129       (_AddChangeGroup, [
13130         ("instances", _STRING_LIST),
13131         ("target_groups", _STRING_LIST),
13132         ], _NEVAC_RESULT),
13133     }
13134
13135   def Run(self, name, validate=True, call_fn=None):
13136     """Run an instance allocator and return the results.
13137
13138     """
13139     if call_fn is None:
13140       call_fn = self.rpc.call_iallocator_runner
13141
13142     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13143     result.Raise("Failure while running the iallocator script")
13144
13145     self.out_text = result.payload
13146     if validate:
13147       self._ValidateResult()
13148
13149   def _ValidateResult(self):
13150     """Process the allocator results.
13151
13152     This will process and if successful save the result in
13153     self.out_data and the other parameters.
13154
13155     """
13156     try:
13157       rdict = serializer.Load(self.out_text)
13158     except Exception, err:
13159       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13160
13161     if not isinstance(rdict, dict):
13162       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13163
13164     # TODO: remove backwards compatiblity in later versions
13165     if "nodes" in rdict and "result" not in rdict:
13166       rdict["result"] = rdict["nodes"]
13167       del rdict["nodes"]
13168
13169     for key in "success", "info", "result":
13170       if key not in rdict:
13171         raise errors.OpExecError("Can't parse iallocator results:"
13172                                  " missing key '%s'" % key)
13173       setattr(self, key, rdict[key])
13174
13175     if not self._result_check(self.result):
13176       raise errors.OpExecError("Iallocator returned invalid result,"
13177                                " expected %s, got %s" %
13178                                (self._result_check, self.result),
13179                                errors.ECODE_INVAL)
13180
13181     if self.mode in (constants.IALLOCATOR_MODE_RELOC,
13182                      constants.IALLOCATOR_MODE_MEVAC):
13183       node2group = dict((name, ndata["group"])
13184                         for (name, ndata) in self.in_data["nodes"].items())
13185
13186       fn = compat.partial(self._NodesToGroups, node2group,
13187                           self.in_data["nodegroups"])
13188
13189       if self.mode == constants.IALLOCATOR_MODE_RELOC:
13190         assert self.relocate_from is not None
13191         assert self.required_nodes == 1
13192
13193         request_groups = fn(self.relocate_from)
13194         result_groups = fn(rdict["result"])
13195
13196         if result_groups != request_groups:
13197           raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13198                                    " differ from original groups (%s)" %
13199                                    (utils.CommaJoin(result_groups),
13200                                     utils.CommaJoin(request_groups)))
13201       elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
13202         request_groups = fn(self.evac_nodes)
13203         for (instance_name, secnode) in self.result:
13204           result_groups = fn([secnode])
13205           if result_groups != request_groups:
13206             raise errors.OpExecError("Iallocator returned new secondary node"
13207                                      " '%s' (group '%s') for instance '%s'"
13208                                      " which is not in original group '%s'" %
13209                                      (secnode, utils.CommaJoin(result_groups),
13210                                       instance_name,
13211                                       utils.CommaJoin(request_groups)))
13212       else:
13213         raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
13214
13215     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13216       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13217
13218     self.out_data = rdict
13219
13220   @staticmethod
13221   def _NodesToGroups(node2group, groups, nodes):
13222     """Returns a list of unique group names for a list of nodes.
13223
13224     @type node2group: dict
13225     @param node2group: Map from node name to group UUID
13226     @type groups: dict
13227     @param groups: Group information
13228     @type nodes: list
13229     @param nodes: Node names
13230
13231     """
13232     result = set()
13233
13234     for node in nodes:
13235       try:
13236         group_uuid = node2group[node]
13237       except KeyError:
13238         # Ignore unknown node
13239         pass
13240       else:
13241         try:
13242           group = groups[group_uuid]
13243         except KeyError:
13244           # Can't find group, let's use UUID
13245           group_name = group_uuid
13246         else:
13247           group_name = group["name"]
13248
13249         result.add(group_name)
13250
13251     return sorted(result)
13252
13253
13254 class LUTestAllocator(NoHooksLU):
13255   """Run allocator tests.
13256
13257   This LU runs the allocator tests
13258
13259   """
13260   def CheckPrereq(self):
13261     """Check prerequisites.
13262
13263     This checks the opcode parameters depending on the director and mode test.
13264
13265     """
13266     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13267       for attr in ["memory", "disks", "disk_template",
13268                    "os", "tags", "nics", "vcpus"]:
13269         if not hasattr(self.op, attr):
13270           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13271                                      attr, errors.ECODE_INVAL)
13272       iname = self.cfg.ExpandInstanceName(self.op.name)
13273       if iname is not None:
13274         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13275                                    iname, errors.ECODE_EXISTS)
13276       if not isinstance(self.op.nics, list):
13277         raise errors.OpPrereqError("Invalid parameter 'nics'",
13278                                    errors.ECODE_INVAL)
13279       if not isinstance(self.op.disks, list):
13280         raise errors.OpPrereqError("Invalid parameter 'disks'",
13281                                    errors.ECODE_INVAL)
13282       for row in self.op.disks:
13283         if (not isinstance(row, dict) or
13284             constants.IDISK_SIZE not in row or
13285             not isinstance(row[constants.IDISK_SIZE], int) or
13286             constants.IDISK_MODE not in row or
13287             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13288           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13289                                      " parameter", errors.ECODE_INVAL)
13290       if self.op.hypervisor is None:
13291         self.op.hypervisor = self.cfg.GetHypervisorType()
13292     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13293       fname = _ExpandInstanceName(self.cfg, self.op.name)
13294       self.op.name = fname
13295       self.relocate_from = \
13296           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13297     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13298       if not hasattr(self.op, "evac_nodes"):
13299         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
13300                                    " opcode input", errors.ECODE_INVAL)
13301     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13302                           constants.IALLOCATOR_MODE_NODE_EVAC):
13303       if not self.op.instances:
13304         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13305       self.op.instances = _GetWantedInstances(self, self.op.instances)
13306     else:
13307       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13308                                  self.op.mode, errors.ECODE_INVAL)
13309
13310     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13311       if self.op.allocator is None:
13312         raise errors.OpPrereqError("Missing allocator name",
13313                                    errors.ECODE_INVAL)
13314     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13315       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13316                                  self.op.direction, errors.ECODE_INVAL)
13317
13318   def Exec(self, feedback_fn):
13319     """Run the allocator test.
13320
13321     """
13322     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13323       ial = IAllocator(self.cfg, self.rpc,
13324                        mode=self.op.mode,
13325                        name=self.op.name,
13326                        memory=self.op.memory,
13327                        disks=self.op.disks,
13328                        disk_template=self.op.disk_template,
13329                        os=self.op.os,
13330                        tags=self.op.tags,
13331                        nics=self.op.nics,
13332                        vcpus=self.op.vcpus,
13333                        hypervisor=self.op.hypervisor,
13334                        )
13335     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13336       ial = IAllocator(self.cfg, self.rpc,
13337                        mode=self.op.mode,
13338                        name=self.op.name,
13339                        relocate_from=list(self.relocate_from),
13340                        )
13341     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13342       ial = IAllocator(self.cfg, self.rpc,
13343                        mode=self.op.mode,
13344                        evac_nodes=self.op.evac_nodes)
13345     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13346       ial = IAllocator(self.cfg, self.rpc,
13347                        mode=self.op.mode,
13348                        instances=self.op.instances,
13349                        target_groups=self.op.target_groups)
13350     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13351       ial = IAllocator(self.cfg, self.rpc,
13352                        mode=self.op.mode,
13353                        instances=self.op.instances,
13354                        evac_mode=self.op.evac_mode)
13355     else:
13356       raise errors.ProgrammerError("Uncatched mode %s in"
13357                                    " LUTestAllocator.Exec", self.op.mode)
13358
13359     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13360       result = ial.in_text
13361     else:
13362       ial.Run(self.op.allocator, validate=False)
13363       result = ial.out_text
13364     return result
13365
13366
13367 #: Query type implementations
13368 _QUERY_IMPL = {
13369   constants.QR_INSTANCE: _InstanceQuery,
13370   constants.QR_NODE: _NodeQuery,
13371   constants.QR_GROUP: _GroupQuery,
13372   constants.QR_OS: _OsQuery,
13373   }
13374
13375 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13376
13377
13378 def _GetQueryImplementation(name):
13379   """Returns the implemtnation for a query type.
13380
13381   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13382
13383   """
13384   try:
13385     return _QUERY_IMPL[name]
13386   except KeyError:
13387     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13388                                errors.ECODE_INVAL)