code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62
  63 import ganeti.masterd.instance # pylint: disable=W0611
  64
  65
  66 class ResultWithJobs:
  67   """Data container for LU results with jobs.
  68
  69   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  70   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  71   contained in the C{jobs} attribute and include the job IDs in the opcode
  72   result.
  73
  74   """
  75   def __init__(self, jobs, **kwargs):
  76     """Initializes this class.
  77
  78     Additional return values can be specified as keyword arguments.
  79
  80     @type jobs: list of lists of L{opcode.OpCode}
  81     @param jobs: A list of lists of opcode objects
  82
  83     """
  84     self.jobs = jobs
  85     self.other = kwargs
  86
  87
  88 class LogicalUnit(object):
  89   """Logical Unit base class.
  90
  91   Subclasses must follow these rules:
  92     - implement ExpandNames
  93     - implement CheckPrereq (except when tasklets are used)
  94     - implement Exec (except when tasklets are used)
  95     - implement BuildHooksEnv
  96     - implement BuildHooksNodes
  97     - redefine HPATH and HTYPE
  98     - optionally redefine their run requirements:
  99         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 100
 101   Note that all commands require root permissions.
 102
 103   @ivar dry_run_result: the value (if any) that will be returned to the caller
 104       in dry-run mode (signalled by opcode dry_run parameter)
 105
 106   """
 107   HPATH = None
 108   HTYPE = None
 109   REQ_BGL = True
 110
 111   def __init__(self, processor, op, context, rpc):
 112     """Constructor for LogicalUnit.
 113
 114     This needs to be overridden in derived classes in order to check op
 115     validity.
 116
 117     """
 118     self.proc = processor
 119     self.op = op
 120     self.cfg = context.cfg
 121     self.glm = context.glm
 122     # readability alias
 123     self.owned_locks = context.glm.list_owned
 124     self.context = context
 125     self.rpc = rpc
 126     # Dicts used to declare locking needs to mcpu
 127     self.needed_locks = None
 128     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 129     self.add_locks = {}
 130     self.remove_locks = {}
 131     # Used to force good behavior when calling helper functions
 132     self.recalculate_locks = {}
 133     # logging
 134     self.Log = processor.Log # pylint: disable=C0103
 135     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 136     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 137     self.LogStep = processor.LogStep # pylint: disable=C0103
 138     # support for dry-run
 139     self.dry_run_result = None
 140     # support for generic debug attribute
 141     if (not hasattr(self.op, "debug_level") or
 142         not isinstance(self.op.debug_level, int)):
 143       self.op.debug_level = 0
 144
 145     # Tasklets
 146     self.tasklets = None
 147
 148     # Validate opcode parameters and set defaults
 149     self.op.Validate(True)
 150
 151     self.CheckArguments()
 152
 153   def CheckArguments(self):
 154     """Check syntactic validity for the opcode arguments.
 155
 156     This method is for doing a simple syntactic check and ensure
 157     validity of opcode parameters, without any cluster-related
 158     checks. While the same can be accomplished in ExpandNames and/or
 159     CheckPrereq, doing these separate is better because:
 160
 161       - ExpandNames is left as as purely a lock-related function
 162       - CheckPrereq is run after we have acquired locks (and possible
 163         waited for them)
 164
 165     The function is allowed to change the self.op attribute so that
 166     later methods can no longer worry about missing parameters.
 167
 168     """
 169     pass
 170
 171   def ExpandNames(self):
 172     """Expand names for this LU.
 173
 174     This method is called before starting to execute the opcode, and it should
 175     update all the parameters of the opcode to their canonical form (e.g. a
 176     short node name must be fully expanded after this method has successfully
 177     completed). This way locking, hooks, logging, etc. can work correctly.
 178
 179     LUs which implement this method must also populate the self.needed_locks
 180     member, as a dict with lock levels as keys, and a list of needed lock names
 181     as values. Rules:
 182
 183       - use an empty dict if you don't need any lock
 184       - if you don't need any lock at a particular level omit that level
 185       - don't put anything for the BGL level
 186       - if you want all locks at a level use locking.ALL_SET as a value
 187
 188     If you need to share locks (rather than acquire them exclusively) at one
 189     level you can modify self.share_locks, setting a true value (usually 1) for
 190     that level. By default locks are not shared.
 191
 192     This function can also define a list of tasklets, which then will be
 193     executed in order instead of the usual LU-level CheckPrereq and Exec
 194     functions, if those are not defined by the LU.
 195
 196     Examples::
 197
 198       # Acquire all nodes and one instance
 199       self.needed_locks = {
 200         locking.LEVEL_NODE: locking.ALL_SET,
 201         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 202       }
 203       # Acquire just two nodes
 204       self.needed_locks = {
 205         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 206       }
 207       # Acquire no locks
 208       self.needed_locks = {} # No, you can't leave it to the default value None
 209
 210     """
 211     # The implementation of this method is mandatory only if the new LU is
 212     # concurrent, so that old LUs don't need to be changed all at the same
 213     # time.
 214     if self.REQ_BGL:
 215       self.needed_locks = {} # Exclusive LUs don't need locks.
 216     else:
 217       raise NotImplementedError
 218
 219   def DeclareLocks(self, level):
 220     """Declare LU locking needs for a level
 221
 222     While most LUs can just declare their locking needs at ExpandNames time,
 223     sometimes there's the need to calculate some locks after having acquired
 224     the ones before. This function is called just before acquiring locks at a
 225     particular level, but after acquiring the ones at lower levels, and permits
 226     such calculations. It can be used to modify self.needed_locks, and by
 227     default it does nothing.
 228
 229     This function is only called if you have something already set in
 230     self.needed_locks for the level.
 231
 232     @param level: Locking level which is going to be locked
 233     @type level: member of ganeti.locking.LEVELS
 234
 235     """
 236
 237   def CheckPrereq(self):
 238     """Check prerequisites for this LU.
 239
 240     This method should check that the prerequisites for the execution
 241     of this LU are fulfilled. It can do internode communication, but
 242     it should be idempotent - no cluster or system changes are
 243     allowed.
 244
 245     The method should raise errors.OpPrereqError in case something is
 246     not fulfilled. Its return value is ignored.
 247
 248     This method should also update all the parameters of the opcode to
 249     their canonical form if it hasn't been done by ExpandNames before.
 250
 251     """
 252     if self.tasklets is not None:
 253       for (idx, tl) in enumerate(self.tasklets):
 254         logging.debug("Checking prerequisites for tasklet %s/%s",
 255                       idx + 1, len(self.tasklets))
 256         tl.CheckPrereq()
 257     else:
 258       pass
 259
 260   def Exec(self, feedback_fn):
 261     """Execute the LU.
 262
 263     This method should implement the actual work. It should raise
 264     errors.OpExecError for failures that are somewhat dealt with in
 265     code, or expected.
 266
 267     """
 268     if self.tasklets is not None:
 269       for (idx, tl) in enumerate(self.tasklets):
 270         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 271         tl.Exec(feedback_fn)
 272     else:
 273       raise NotImplementedError
 274
 275   def BuildHooksEnv(self):
 276     """Build hooks environment for this LU.
 277
 278     @rtype: dict
 279     @return: Dictionary containing the environment that will be used for
 280       running the hooks for this LU. The keys of the dict must not be prefixed
 281       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 282       will extend the environment with additional variables. If no environment
 283       should be defined, an empty dictionary should be returned (not C{None}).
 284     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 285       will not be called.
 286
 287     """
 288     raise NotImplementedError
 289
 290   def BuildHooksNodes(self):
 291     """Build list of nodes to run LU's hooks.
 292
 293     @rtype: tuple; (list, list)
 294     @return: Tuple containing a list of node names on which the hook
 295       should run before the execution and a list of node names on which the
 296       hook should run after the execution. No nodes should be returned as an
 297       empty list (and not None).
 298     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 299       will not be called.
 300
 301     """
 302     raise NotImplementedError
 303
 304   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 305     """Notify the LU about the results of its hooks.
 306
 307     This method is called every time a hooks phase is executed, and notifies
 308     the Logical Unit about the hooks' result. The LU can then use it to alter
 309     its result based on the hooks.  By default the method does nothing and the
 310     previous result is passed back unchanged but any LU can define it if it
 311     wants to use the local cluster hook-scripts somehow.
 312
 313     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 314         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 315     @param hook_results: the results of the multi-node hooks rpc call
 316     @param feedback_fn: function used send feedback back to the caller
 317     @param lu_result: the previous Exec result this LU had, or None
 318         in the PRE phase
 319     @return: the new Exec result, based on the previous result
 320         and hook results
 321
 322     """
 323     # API must be kept, thus we ignore the unused argument and could
 324     # be a function warnings
 325     # pylint: disable=W0613,R0201
 326     return lu_result
 327
 328   def _ExpandAndLockInstance(self):
 329     """Helper function to expand and lock an instance.
 330
 331     Many LUs that work on an instance take its name in self.op.instance_name
 332     and need to expand it and then declare the expanded name for locking. This
 333     function does it, and then updates self.op.instance_name to the expanded
 334     name. It also initializes needed_locks as a dict, if this hasn't been done
 335     before.
 336
 337     """
 338     if self.needed_locks is None:
 339       self.needed_locks = {}
 340     else:
 341       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 342         "_ExpandAndLockInstance called with instance-level locks set"
 343     self.op.instance_name = _ExpandInstanceName(self.cfg,
 344                                                 self.op.instance_name)
 345     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 346
 347   def _LockInstancesNodes(self, primary_only=False):
 348     """Helper function to declare instances' nodes for locking.
 349
 350     This function should be called after locking one or more instances to lock
 351     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 352     with all primary or secondary nodes for instances already locked and
 353     present in self.needed_locks[locking.LEVEL_INSTANCE].
 354
 355     It should be called from DeclareLocks, and for safety only works if
 356     self.recalculate_locks[locking.LEVEL_NODE] is set.
 357
 358     In the future it may grow parameters to just lock some instance's nodes, or
 359     to just lock primaries or secondary nodes, if needed.
 360
 361     If should be called in DeclareLocks in a way similar to::
 362
 363       if level == locking.LEVEL_NODE:
 364         self._LockInstancesNodes()
 365
 366     @type primary_only: boolean
 367     @param primary_only: only lock primary nodes of locked instances
 368
 369     """
 370     assert locking.LEVEL_NODE in self.recalculate_locks, \
 371       "_LockInstancesNodes helper function called with no nodes to recalculate"
 372
 373     # TODO: check if we're really been called with the instance locks held
 374
 375     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 376     # future we might want to have different behaviors depending on the value
 377     # of self.recalculate_locks[locking.LEVEL_NODE]
 378     wanted_nodes = []
 379     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 380     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 381       wanted_nodes.append(instance.primary_node)
 382       if not primary_only:
 383         wanted_nodes.extend(instance.secondary_nodes)
 384
 385     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 386       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 387     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 388       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 389
 390     del self.recalculate_locks[locking.LEVEL_NODE]
 391
 392
 393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 394   """Simple LU which runs no hooks.
 395
 396   This LU is intended as a parent for other LogicalUnits which will
 397   run no hooks, in order to reduce duplicate code.
 398
 399   """
 400   HPATH = None
 401   HTYPE = None
 402
 403   def BuildHooksEnv(self):
 404     """Empty BuildHooksEnv for NoHooksLu.
 405
 406     This just raises an error.
 407
 408     """
 409     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 410
 411   def BuildHooksNodes(self):
 412     """Empty BuildHooksNodes for NoHooksLU.
 413
 414     """
 415     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 416
 417
 418 class Tasklet:
 419   """Tasklet base class.
 420
 421   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 422   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 423   tasklets know nothing about locks.
 424
 425   Subclasses must follow these rules:
 426     - Implement CheckPrereq
 427     - Implement Exec
 428
 429   """
 430   def __init__(self, lu):
 431     self.lu = lu
 432
 433     # Shortcuts
 434     self.cfg = lu.cfg
 435     self.rpc = lu.rpc
 436
 437   def CheckPrereq(self):
 438     """Check prerequisites for this tasklets.
 439
 440     This method should check whether the prerequisites for the execution of
 441     this tasklet are fulfilled. It can do internode communication, but it
 442     should be idempotent - no cluster or system changes are allowed.
 443
 444     The method should raise errors.OpPrereqError in case something is not
 445     fulfilled. Its return value is ignored.
 446
 447     This method should also update all parameters to their canonical form if it
 448     hasn't been done before.
 449
 450     """
 451     pass
 452
 453   def Exec(self, feedback_fn):
 454     """Execute the tasklet.
 455
 456     This method should implement the actual work. It should raise
 457     errors.OpExecError for failures that are somewhat dealt with in code, or
 458     expected.
 459
 460     """
 461     raise NotImplementedError
 462
 463
 464 class _QueryBase:
 465   """Base for query utility classes.
 466
 467   """
 468   #: Attribute holding field definitions
 469   FIELDS = None
 470
 471   def __init__(self, filter_, fields, use_locking):
 472     """Initializes this class.
 473
 474     """
 475     self.use_locking = use_locking
 476
 477     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 478                              namefield="name")
 479     self.requested_data = self.query.RequestedData()
 480     self.names = self.query.RequestedNames()
 481
 482     # Sort only if no names were requested
 483     self.sort_by_name = not self.names
 484
 485     self.do_locking = None
 486     self.wanted = None
 487
 488   def _GetNames(self, lu, all_names, lock_level):
 489     """Helper function to determine names asked for in the query.
 490
 491     """
 492     if self.do_locking:
 493       names = lu.owned_locks(lock_level)
 494     else:
 495       names = all_names
 496
 497     if self.wanted == locking.ALL_SET:
 498       assert not self.names
 499       # caller didn't specify names, so ordering is not important
 500       return utils.NiceSort(names)
 501
 502     # caller specified names and we must keep the same order
 503     assert self.names
 504     assert not self.do_locking or lu.glm.is_owned(lock_level)
 505
 506     missing = set(self.wanted).difference(names)
 507     if missing:
 508       raise errors.OpExecError("Some items were removed before retrieving"
 509                                " their data: %s" % missing)
 510
 511     # Return expanded names
 512     return self.wanted
 513
 514   def ExpandNames(self, lu):
 515     """Expand names for this query.
 516
 517     See L{LogicalUnit.ExpandNames}.
 518
 519     """
 520     raise NotImplementedError()
 521
 522   def DeclareLocks(self, lu, level):
 523     """Declare locks for this query.
 524
 525     See L{LogicalUnit.DeclareLocks}.
 526
 527     """
 528     raise NotImplementedError()
 529
 530   def _GetQueryData(self, lu):
 531     """Collects all data for this query.
 532
 533     @return: Query data object
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def NewStyleQuery(self, lu):
 539     """Collect data and execute query.
 540
 541     """
 542     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 543                                   sort_by_name=self.sort_by_name)
 544
 545   def OldStyleQuery(self, lu):
 546     """Collect data and execute query.
 547
 548     """
 549     return self.query.OldStyleQuery(self._GetQueryData(lu),
 550                                     sort_by_name=self.sort_by_name)
 551
 552
 553 def _ShareAll():
 554   """Returns a dict declaring all lock levels shared.
 555
 556   """
 557   return dict.fromkeys(locking.LEVELS, 1)
 558
 559
 560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 561   """Checks if the owned node groups are still correct for an instance.
 562
 563   @type cfg: L{config.ConfigWriter}
 564   @param cfg: The cluster configuration
 565   @type instance_name: string
 566   @param instance_name: Instance name
 567   @type owned_groups: set or frozenset
 568   @param owned_groups: List of currently owned node groups
 569
 570   """
 571   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 572
 573   if not owned_groups.issuperset(inst_groups):
 574     raise errors.OpPrereqError("Instance %s's node groups changed since"
 575                                " locks were acquired, current groups are"
 576                                " are '%s', owning groups '%s'; retry the"
 577                                " operation" %
 578                                (instance_name,
 579                                 utils.CommaJoin(inst_groups),
 580                                 utils.CommaJoin(owned_groups)),
 581                                errors.ECODE_STATE)
 582
 583   return inst_groups
 584
 585
 586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 587   """Checks if the instances in a node group are still correct.
 588
 589   @type cfg: L{config.ConfigWriter}
 590   @param cfg: The cluster configuration
 591   @type group_uuid: string
 592   @param group_uuid: Node group UUID
 593   @type owned_instances: set or frozenset
 594   @param owned_instances: List of currently owned instances
 595
 596   """
 597   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 598   if owned_instances != wanted_instances:
 599     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 600                                " locks were acquired, wanted '%s', have '%s';"
 601                                " retry the operation" %
 602                                (group_uuid,
 603                                 utils.CommaJoin(wanted_instances),
 604                                 utils.CommaJoin(owned_instances)),
 605                                errors.ECODE_STATE)
 606
 607   return wanted_instances
 608
 609
 610 def _SupportsOob(cfg, node):
 611   """Tells if node supports OOB.
 612
 613   @type cfg: L{config.ConfigWriter}
 614   @param cfg: The cluster configuration
 615   @type node: L{objects.Node}
 616   @param node: The node
 617   @return: The OOB script if supported or an empty string otherwise
 618
 619   """
 620   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 621
 622
 623 def _GetWantedNodes(lu, nodes):
 624   """Returns list of checked and expanded node names.
 625
 626   @type lu: L{LogicalUnit}
 627   @param lu: the logical unit on whose behalf we execute
 628   @type nodes: list
 629   @param nodes: list of node names or None for all nodes
 630   @rtype: list
 631   @return: the list of nodes, sorted
 632   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 633
 634   """
 635   if nodes:
 636     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 637
 638   return utils.NiceSort(lu.cfg.GetNodeList())
 639
 640
 641 def _GetWantedInstances(lu, instances):
 642   """Returns list of checked and expanded instance names.
 643
 644   @type lu: L{LogicalUnit}
 645   @param lu: the logical unit on whose behalf we execute
 646   @type instances: list
 647   @param instances: list of instance names or None for all instances
 648   @rtype: list
 649   @return: the list of instances, sorted
 650   @raise errors.OpPrereqError: if the instances parameter is wrong type
 651   @raise errors.OpPrereqError: if any of the passed instances is not found
 652
 653   """
 654   if instances:
 655     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 656   else:
 657     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 658   return wanted
 659
 660
 661 def _GetUpdatedParams(old_params, update_dict,
 662                       use_default=True, use_none=False):
 663   """Return the new version of a parameter dictionary.
 664
 665   @type old_params: dict
 666   @param old_params: old parameters
 667   @type update_dict: dict
 668   @param update_dict: dict containing new parameter values, or
 669       constants.VALUE_DEFAULT to reset the parameter to its default
 670       value
 671   @param use_default: boolean
 672   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 673       values as 'to be deleted' values
 674   @param use_none: boolean
 675   @type use_none: whether to recognise C{None} values as 'to be
 676       deleted' values
 677   @rtype: dict
 678   @return: the new parameter dictionary
 679
 680   """
 681   params_copy = copy.deepcopy(old_params)
 682   for key, val in update_dict.iteritems():
 683     if ((use_default and val == constants.VALUE_DEFAULT) or
 684         (use_none and val is None)):
 685       try:
 686         del params_copy[key]
 687       except KeyError:
 688         pass
 689     else:
 690       params_copy[key] = val
 691   return params_copy
 692
 693
 694 def _ReleaseLocks(lu, level, names=None, keep=None):
 695   """Releases locks owned by an LU.
 696
 697   @type lu: L{LogicalUnit}
 698   @param level: Lock level
 699   @type names: list or None
 700   @param names: Names of locks to release
 701   @type keep: list or None
 702   @param keep: Names of locks to retain
 703
 704   """
 705   assert not (keep is not None and names is not None), \
 706          "Only one of the 'names' and the 'keep' parameters can be given"
 707
 708   if names is not None:
 709     should_release = names.__contains__
 710   elif keep:
 711     should_release = lambda name: name not in keep
 712   else:
 713     should_release = None
 714
 715   if should_release:
 716     retain = []
 717     release = []
 718
 719     # Determine which locks to release
 720     for name in lu.owned_locks(level):
 721       if should_release(name):
 722         release.append(name)
 723       else:
 724         retain.append(name)
 725
 726     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 727
 728     # Release just some locks
 729     lu.glm.release(level, names=release)
 730
 731     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 732   else:
 733     # Release everything
 734     lu.glm.release(level)
 735
 736     assert not lu.glm.is_owned(level), "No locks should be owned"
 737
 738
 739 def _MapInstanceDisksToNodes(instances):
 740   """Creates a map from (node, volume) to instance name.
 741
 742   @type instances: list of L{objects.Instance}
 743   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 744
 745   """
 746   return dict(((node, vol), inst.name)
 747               for inst in instances
 748               for (node, vols) in inst.MapLVsByNode().items()
 749               for vol in vols)
 750
 751
 752 def _RunPostHook(lu, node_name):
 753   """Runs the post-hook for an opcode on a single node.
 754
 755   """
 756   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 757   try:
 758     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 759   except:
 760     # pylint: disable=W0702
 761     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 762
 763
 764 def _CheckOutputFields(static, dynamic, selected):
 765   """Checks whether all selected fields are valid.
 766
 767   @type static: L{utils.FieldSet}
 768   @param static: static fields set
 769   @type dynamic: L{utils.FieldSet}
 770   @param dynamic: dynamic fields set
 771
 772   """
 773   f = utils.FieldSet()
 774   f.Extend(static)
 775   f.Extend(dynamic)
 776
 777   delta = f.NonMatching(selected)
 778   if delta:
 779     raise errors.OpPrereqError("Unknown output fields selected: %s"
 780                                % ",".join(delta), errors.ECODE_INVAL)
 781
 782
 783 def _CheckGlobalHvParams(params):
 784   """Validates that given hypervisor params are not global ones.
 785
 786   This will ensure that instances don't get customised versions of
 787   global params.
 788
 789   """
 790   used_globals = constants.HVC_GLOBALS.intersection(params)
 791   if used_globals:
 792     msg = ("The following hypervisor parameters are global and cannot"
 793            " be customized at instance level, please modify them at"
 794            " cluster level: %s" % utils.CommaJoin(used_globals))
 795     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 796
 797
 798 def _CheckNodeOnline(lu, node, msg=None):
 799   """Ensure that a given node is online.
 800
 801   @param lu: the LU on behalf of which we make the check
 802   @param node: the node to check
 803   @param msg: if passed, should be a message to replace the default one
 804   @raise errors.OpPrereqError: if the node is offline
 805
 806   """
 807   if msg is None:
 808     msg = "Can't use offline node"
 809   if lu.cfg.GetNodeInfo(node).offline:
 810     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 811
 812
 813 def _CheckNodeNotDrained(lu, node):
 814   """Ensure that a given node is not drained.
 815
 816   @param lu: the LU on behalf of which we make the check
 817   @param node: the node to check
 818   @raise errors.OpPrereqError: if the node is drained
 819
 820   """
 821   if lu.cfg.GetNodeInfo(node).drained:
 822     raise errors.OpPrereqError("Can't use drained node %s" % node,
 823                                errors.ECODE_STATE)
 824
 825
 826 def _CheckNodeVmCapable(lu, node):
 827   """Ensure that a given node is vm capable.
 828
 829   @param lu: the LU on behalf of which we make the check
 830   @param node: the node to check
 831   @raise errors.OpPrereqError: if the node is not vm capable
 832
 833   """
 834   if not lu.cfg.GetNodeInfo(node).vm_capable:
 835     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 836                                errors.ECODE_STATE)
 837
 838
 839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 840   """Ensure that a node supports a given OS.
 841
 842   @param lu: the LU on behalf of which we make the check
 843   @param node: the node to check
 844   @param os_name: the OS to query about
 845   @param force_variant: whether to ignore variant errors
 846   @raise errors.OpPrereqError: if the node is not supporting the OS
 847
 848   """
 849   result = lu.rpc.call_os_get(node, os_name)
 850   result.Raise("OS '%s' not in supported OS list for node %s" %
 851                (os_name, node),
 852                prereq=True, ecode=errors.ECODE_INVAL)
 853   if not force_variant:
 854     _CheckOSVariant(result.payload, os_name)
 855
 856
 857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 858   """Ensure that a node has the given secondary ip.
 859
 860   @type lu: L{LogicalUnit}
 861   @param lu: the LU on behalf of which we make the check
 862   @type node: string
 863   @param node: the node to check
 864   @type secondary_ip: string
 865   @param secondary_ip: the ip to check
 866   @type prereq: boolean
 867   @param prereq: whether to throw a prerequisite or an execute error
 868   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 869   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 870
 871   """
 872   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 873   result.Raise("Failure checking secondary ip on node %s" % node,
 874                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 875   if not result.payload:
 876     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 877            " please fix and re-run this command" % secondary_ip)
 878     if prereq:
 879       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 880     else:
 881       raise errors.OpExecError(msg)
 882
 883
 884 def _GetClusterDomainSecret():
 885   """Reads the cluster domain secret.
 886
 887   """
 888   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 889                                strict=True)
 890
 891
 892 def _CheckInstanceDown(lu, instance, reason):
 893   """Ensure that an instance is not running."""
 894   if instance.admin_up:
 895     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 896                                (instance.name, reason), errors.ECODE_STATE)
 897
 898   pnode = instance.primary_node
 899   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 900   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 901               prereq=True, ecode=errors.ECODE_ENVIRON)
 902
 903   if instance.name in ins_l.payload:
 904     raise errors.OpPrereqError("Instance %s is running, %s" %
 905                                (instance.name, reason), errors.ECODE_STATE)
 906
 907
 908 def _ExpandItemName(fn, name, kind):
 909   """Expand an item name.
 910
 911   @param fn: the function to use for expansion
 912   @param name: requested item name
 913   @param kind: text description ('Node' or 'Instance')
 914   @return: the resolved (full) name
 915   @raise errors.OpPrereqError: if the item is not found
 916
 917   """
 918   full_name = fn(name)
 919   if full_name is None:
 920     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 921                                errors.ECODE_NOENT)
 922   return full_name
 923
 924
 925 def _ExpandNodeName(cfg, name):
 926   """Wrapper over L{_ExpandItemName} for nodes."""
 927   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 928
 929
 930 def _ExpandInstanceName(cfg, name):
 931   """Wrapper over L{_ExpandItemName} for instance."""
 932   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 933
 934
 935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 936                           memory, vcpus, nics, disk_template, disks,
 937                           bep, hvp, hypervisor_name, tags):
 938   """Builds instance related env variables for hooks
 939
 940   This builds the hook environment from individual variables.
 941
 942   @type name: string
 943   @param name: the name of the instance
 944   @type primary_node: string
 945   @param primary_node: the name of the instance's primary node
 946   @type secondary_nodes: list
 947   @param secondary_nodes: list of secondary nodes as strings
 948   @type os_type: string
 949   @param os_type: the name of the instance's OS
 950   @type status: boolean
 951   @param status: the should_run status of the instance
 952   @type memory: string
 953   @param memory: the memory size of the instance
 954   @type vcpus: string
 955   @param vcpus: the count of VCPUs the instance has
 956   @type nics: list
 957   @param nics: list of tuples (ip, mac, mode, link) representing
 958       the NICs the instance has
 959   @type disk_template: string
 960   @param disk_template: the disk template of the instance
 961   @type disks: list
 962   @param disks: the list of (size, mode) pairs
 963   @type bep: dict
 964   @param bep: the backend parameters for the instance
 965   @type hvp: dict
 966   @param hvp: the hypervisor parameters for the instance
 967   @type hypervisor_name: string
 968   @param hypervisor_name: the hypervisor for the instance
 969   @type tags: list
 970   @param tags: list of instance tags as strings
 971   @rtype: dict
 972   @return: the hook environment for this instance
 973
 974   """
 975   if status:
 976     str_status = "up"
 977   else:
 978     str_status = "down"
 979   env = {
 980     "OP_TARGET": name,
 981     "INSTANCE_NAME": name,
 982     "INSTANCE_PRIMARY": primary_node,
 983     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 984     "INSTANCE_OS_TYPE": os_type,
 985     "INSTANCE_STATUS": str_status,
 986     "INSTANCE_MEMORY": memory,
 987     "INSTANCE_VCPUS": vcpus,
 988     "INSTANCE_DISK_TEMPLATE": disk_template,
 989     "INSTANCE_HYPERVISOR": hypervisor_name,
 990   }
 991
 992   if nics:
 993     nic_count = len(nics)
 994     for idx, (ip, mac, mode, link) in enumerate(nics):
 995       if ip is None:
 996         ip = ""
 997       env["INSTANCE_NIC%d_IP" % idx] = ip
 998       env["INSTANCE_NIC%d_MAC" % idx] = mac
 999       env["INSTANCE_NIC%d_MODE" % idx] = mode
1000       env["INSTANCE_NIC%d_LINK" % idx] = link
1001       if mode == constants.NIC_MODE_BRIDGED:
1002         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003   else:
1004     nic_count = 0
1005
1006   env["INSTANCE_NIC_COUNT"] = nic_count
1007
1008   if disks:
1009     disk_count = len(disks)
1010     for idx, (size, mode) in enumerate(disks):
1011       env["INSTANCE_DISK%d_SIZE" % idx] = size
1012       env["INSTANCE_DISK%d_MODE" % idx] = mode
1013   else:
1014     disk_count = 0
1015
1016   env["INSTANCE_DISK_COUNT"] = disk_count
1017
1018   if not tags:
1019     tags = []
1020
1021   env["INSTANCE_TAGS"] = " ".join(tags)
1022
1023   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024     for key, value in source.items():
1025       env["INSTANCE_%s_%s" % (kind, key)] = value
1026
1027   return env
1028
1029
1030 def _NICListToTuple(lu, nics):
1031   """Build a list of nic information tuples.
1032
1033   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034   value in LUInstanceQueryData.
1035
1036   @type lu:  L{LogicalUnit}
1037   @param lu: the logical unit on whose behalf we execute
1038   @type nics: list of L{objects.NIC}
1039   @param nics: list of nics to convert to hooks tuples
1040
1041   """
1042   hooks_nics = []
1043   cluster = lu.cfg.GetClusterInfo()
1044   for nic in nics:
1045     ip = nic.ip
1046     mac = nic.mac
1047     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048     mode = filled_params[constants.NIC_MODE]
1049     link = filled_params[constants.NIC_LINK]
1050     hooks_nics.append((ip, mac, mode, link))
1051   return hooks_nics
1052
1053
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055   """Builds instance related env variables for hooks from an object.
1056
1057   @type lu: L{LogicalUnit}
1058   @param lu: the logical unit on whose behalf we execute
1059   @type instance: L{objects.Instance}
1060   @param instance: the instance for which we should build the
1061       environment
1062   @type override: dict
1063   @param override: dictionary with key/values that will override
1064       our values
1065   @rtype: dict
1066   @return: the hook environment dictionary
1067
1068   """
1069   cluster = lu.cfg.GetClusterInfo()
1070   bep = cluster.FillBE(instance)
1071   hvp = cluster.FillHV(instance)
1072   args = {
1073     "name": instance.name,
1074     "primary_node": instance.primary_node,
1075     "secondary_nodes": instance.secondary_nodes,
1076     "os_type": instance.os,
1077     "status": instance.admin_up,
1078     "memory": bep[constants.BE_MEMORY],
1079     "vcpus": bep[constants.BE_VCPUS],
1080     "nics": _NICListToTuple(lu, instance.nics),
1081     "disk_template": instance.disk_template,
1082     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083     "bep": bep,
1084     "hvp": hvp,
1085     "hypervisor_name": instance.hypervisor,
1086     "tags": instance.tags,
1087   }
1088   if override:
1089     args.update(override)
1090   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091
1092
1093 def _AdjustCandidatePool(lu, exceptions):
1094   """Adjust the candidate pool after node operations.
1095
1096   """
1097   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098   if mod_list:
1099     lu.LogInfo("Promoted nodes to master candidate role: %s",
1100                utils.CommaJoin(node.name for node in mod_list))
1101     for name in mod_list:
1102       lu.context.ReaddNode(name)
1103   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104   if mc_now > mc_max:
1105     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106                (mc_now, mc_max))
1107
1108
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110   """Decide whether I should promote myself as a master candidate.
1111
1112   """
1113   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115   # the new node will increase mc_max with one, so:
1116   mc_should = min(mc_should + 1, cp_size)
1117   return mc_now < mc_should
1118
1119
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121   """Check that the brigdes needed by a list of nics exist.
1122
1123   """
1124   cluster = lu.cfg.GetClusterInfo()
1125   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126   brlist = [params[constants.NIC_LINK] for params in paramslist
1127             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128   if brlist:
1129     result = lu.rpc.call_bridges_exist(target_node, brlist)
1130     result.Raise("Error checking bridges on destination node '%s'" %
1131                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132
1133
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135   """Check that the brigdes needed by an instance exist.
1136
1137   """
1138   if node is None:
1139     node = instance.primary_node
1140   _CheckNicsBridgesExist(lu, instance.nics, node)
1141
1142
1143 def _CheckOSVariant(os_obj, name):
1144   """Check whether an OS name conforms to the os variants specification.
1145
1146   @type os_obj: L{objects.OS}
1147   @param os_obj: OS object to check
1148   @type name: string
1149   @param name: OS name passed by the user, to check for validity
1150
1151   """
1152   variant = objects.OS.GetVariant(name)
1153   if not os_obj.supported_variants:
1154     if variant:
1155       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156                                  " passed)" % (os_obj.name, variant),
1157                                  errors.ECODE_INVAL)
1158     return
1159   if not variant:
1160     raise errors.OpPrereqError("OS name must include a variant",
1161                                errors.ECODE_INVAL)
1162
1163   if variant not in os_obj.supported_variants:
1164     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165
1166
1167 def _GetNodeInstancesInner(cfg, fn):
1168   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169
1170
1171 def _GetNodeInstances(cfg, node_name):
1172   """Returns a list of all primary and secondary instances on a node.
1173
1174   """
1175
1176   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177
1178
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180   """Returns primary instances on a node.
1181
1182   """
1183   return _GetNodeInstancesInner(cfg,
1184                                 lambda inst: node_name == inst.primary_node)
1185
1186
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188   """Returns secondary instances on a node.
1189
1190   """
1191   return _GetNodeInstancesInner(cfg,
1192                                 lambda inst: node_name in inst.secondary_nodes)
1193
1194
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196   """Returns the arguments for a storage type.
1197
1198   """
1199   # Special case for file storage
1200   if storage_type == constants.ST_FILE:
1201     # storage.FileStorage wants a list of storage directories
1202     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203
1204   return []
1205
1206
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208   faulty = []
1209
1210   for dev in instance.disks:
1211     cfg.SetDiskID(dev, node_name)
1212
1213   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214   result.Raise("Failed to get disk status from node %s" % node_name,
1215                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216
1217   for idx, bdev_status in enumerate(result.payload):
1218     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219       faulty.append(idx)
1220
1221   return faulty
1222
1223
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225   """Check the sanity of iallocator and node arguments and use the
1226   cluster-wide iallocator if appropriate.
1227
1228   Check that at most one of (iallocator, node) is specified. If none is
1229   specified, then the LU's opcode's iallocator slot is filled with the
1230   cluster-wide default iallocator.
1231
1232   @type iallocator_slot: string
1233   @param iallocator_slot: the name of the opcode iallocator slot
1234   @type node_slot: string
1235   @param node_slot: the name of the opcode target node slot
1236
1237   """
1238   node = getattr(lu.op, node_slot, None)
1239   iallocator = getattr(lu.op, iallocator_slot, None)
1240
1241   if node is not None and iallocator is not None:
1242     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243                                errors.ECODE_INVAL)
1244   elif node is None and iallocator is None:
1245     default_iallocator = lu.cfg.GetDefaultIAllocator()
1246     if default_iallocator:
1247       setattr(lu.op, iallocator_slot, default_iallocator)
1248     else:
1249       raise errors.OpPrereqError("No iallocator or node given and no"
1250                                  " cluster-wide default iallocator found;"
1251                                  " please specify either an iallocator or a"
1252                                  " node, or set a cluster-wide default"
1253                                  " iallocator")
1254
1255
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257   """Decides on which iallocator to use.
1258
1259   @type cfg: L{config.ConfigWriter}
1260   @param cfg: Cluster configuration object
1261   @type iallocator: string or None
1262   @param iallocator: Iallocator specified in opcode
1263   @rtype: string
1264   @return: Iallocator name
1265
1266   """
1267   if not iallocator:
1268     # Use default iallocator
1269     iallocator = cfg.GetDefaultIAllocator()
1270
1271   if not iallocator:
1272     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273                                " opcode nor as a cluster-wide default",
1274                                errors.ECODE_INVAL)
1275
1276   return iallocator
1277
1278
1279 class LUClusterPostInit(LogicalUnit):
1280   """Logical unit for running hooks after cluster initialization.
1281
1282   """
1283   HPATH = "cluster-init"
1284   HTYPE = constants.HTYPE_CLUSTER
1285
1286   def BuildHooksEnv(self):
1287     """Build hooks env.
1288
1289     """
1290     return {
1291       "OP_TARGET": self.cfg.GetClusterName(),
1292       }
1293
1294   def BuildHooksNodes(self):
1295     """Build hooks nodes.
1296
1297     """
1298     return ([], [self.cfg.GetMasterNode()])
1299
1300   def Exec(self, feedback_fn):
1301     """Nothing to do.
1302
1303     """
1304     return True
1305
1306
1307 class LUClusterDestroy(LogicalUnit):
1308   """Logical unit for destroying the cluster.
1309
1310   """
1311   HPATH = "cluster-destroy"
1312   HTYPE = constants.HTYPE_CLUSTER
1313
1314   def BuildHooksEnv(self):
1315     """Build hooks env.
1316
1317     """
1318     return {
1319       "OP_TARGET": self.cfg.GetClusterName(),
1320       }
1321
1322   def BuildHooksNodes(self):
1323     """Build hooks nodes.
1324
1325     """
1326     return ([], [])
1327
1328   def CheckPrereq(self):
1329     """Check prerequisites.
1330
1331     This checks whether the cluster is empty.
1332
1333     Any errors are signaled by raising errors.OpPrereqError.
1334
1335     """
1336     master = self.cfg.GetMasterNode()
1337
1338     nodelist = self.cfg.GetNodeList()
1339     if len(nodelist) != 1 or nodelist[0] != master:
1340       raise errors.OpPrereqError("There are still %d node(s) in"
1341                                  " this cluster." % (len(nodelist) - 1),
1342                                  errors.ECODE_INVAL)
1343     instancelist = self.cfg.GetInstanceList()
1344     if instancelist:
1345       raise errors.OpPrereqError("There are still %d instance(s) in"
1346                                  " this cluster." % len(instancelist),
1347                                  errors.ECODE_INVAL)
1348
1349   def Exec(self, feedback_fn):
1350     """Destroys the cluster.
1351
1352     """
1353     master = self.cfg.GetMasterNode()
1354
1355     # Run post hooks on master node before it's removed
1356     _RunPostHook(self, master)
1357
1358     result = self.rpc.call_node_stop_master(master, False)
1359     result.Raise("Could not disable the master role")
1360
1361     return master
1362
1363
1364 def _VerifyCertificate(filename):
1365   """Verifies a certificate for L{LUClusterVerifyConfig}.
1366
1367   @type filename: string
1368   @param filename: Path to PEM file
1369
1370   """
1371   try:
1372     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373                                            utils.ReadFile(filename))
1374   except Exception, err: # pylint: disable=W0703
1375     return (LUClusterVerifyConfig.ETYPE_ERROR,
1376             "Failed to load X509 certificate %s: %s" % (filename, err))
1377
1378   (errcode, msg) = \
1379     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380                                 constants.SSL_CERT_EXPIRATION_ERROR)
1381
1382   if msg:
1383     fnamemsg = "While verifying %s: %s" % (filename, msg)
1384   else:
1385     fnamemsg = None
1386
1387   if errcode is None:
1388     return (None, fnamemsg)
1389   elif errcode == utils.CERT_WARNING:
1390     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391   elif errcode == utils.CERT_ERROR:
1392     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393
1394   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395
1396
1397 def _GetAllHypervisorParameters(cluster, instances):
1398   """Compute the set of all hypervisor parameters.
1399
1400   @type cluster: L{objects.Cluster}
1401   @param cluster: the cluster object
1402   @param instances: list of L{objects.Instance}
1403   @param instances: additional instances from which to obtain parameters
1404   @rtype: list of (origin, hypervisor, parameters)
1405   @return: a list with all parameters found, indicating the hypervisor they
1406        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407
1408   """
1409   hvp_data = []
1410
1411   for hv_name in cluster.enabled_hypervisors:
1412     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413
1414   for os_name, os_hvp in cluster.os_hvp.items():
1415     for hv_name, hv_params in os_hvp.items():
1416       if hv_params:
1417         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419
1420   # TODO: collapse identical parameter values in a single one
1421   for instance in instances:
1422     if instance.hvparams:
1423       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424                        cluster.FillHV(instance)))
1425
1426   return hvp_data
1427
1428
1429 class _VerifyErrors(object):
1430   """Mix-in for cluster/group verify LUs.
1431
1432   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433   self.op and self._feedback_fn to be available.)
1434
1435   """
1436   TCLUSTER = "cluster"
1437   TNODE = "node"
1438   TINSTANCE = "instance"
1439
1440   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443   ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444   ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452   ENODEDRBD = (TNODE, "ENODEDRBD")
1453   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456   ENODEHV = (TNODE, "ENODEHV")
1457   ENODELVM = (TNODE, "ENODELVM")
1458   ENODEN1 = (TNODE, "ENODEN1")
1459   ENODENET = (TNODE, "ENODENET")
1460   ENODEOS = (TNODE, "ENODEOS")
1461   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463   ENODERPC = (TNODE, "ENODERPC")
1464   ENODESSH = (TNODE, "ENODESSH")
1465   ENODEVERSION = (TNODE, "ENODEVERSION")
1466   ENODESETUP = (TNODE, "ENODESETUP")
1467   ENODETIME = (TNODE, "ENODETIME")
1468   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469
1470   ETYPE_FIELD = "code"
1471   ETYPE_ERROR = "ERROR"
1472   ETYPE_WARNING = "WARNING"
1473
1474   def _Error(self, ecode, item, msg, *args, **kwargs):
1475     """Format an error message.
1476
1477     Based on the opcode's error_codes parameter, either format a
1478     parseable error code, or a simpler error string.
1479
1480     This must be called only from Exec and functions called from Exec.
1481
1482     """
1483     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484     itype, etxt = ecode
1485     # first complete the msg
1486     if args:
1487       msg = msg % args
1488     # then format the whole message
1489     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491     else:
1492       if item:
1493         item = " " + item
1494       else:
1495         item = ""
1496       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497     # and finally report it via the feedback_fn
1498     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499
1500   def _ErrorIf(self, cond, *args, **kwargs):
1501     """Log an error message if the passed condition is True.
1502
1503     """
1504     cond = (bool(cond)
1505             or self.op.debug_simulate_errors) # pylint: disable=E1101
1506     if cond:
1507       self._Error(*args, **kwargs)
1508     # do not mark the operation as failed for WARN cases only
1509     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510       self.bad = self.bad or cond
1511
1512
1513 class LUClusterVerify(NoHooksLU):
1514   """Submits all jobs necessary to verify the cluster.
1515
1516   """
1517   REQ_BGL = False
1518
1519   def ExpandNames(self):
1520     self.needed_locks = {}
1521
1522   def Exec(self, feedback_fn):
1523     jobs = []
1524
1525     if self.op.group_name:
1526       groups = [self.op.group_name]
1527       depends_fn = lambda: None
1528     else:
1529       groups = self.cfg.GetNodeGroupList()
1530
1531       # Verify global configuration
1532       jobs.append([opcodes.OpClusterVerifyConfig()])
1533
1534       # Always depend on global verification
1535       depends_fn = lambda: [(-len(jobs), [])]
1536
1537     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538                                               depends=depends_fn())]
1539                 for group in groups)
1540
1541     # Fix up all parameters
1542     for op in itertools.chain(*jobs): # pylint: disable=W0142
1543       op.debug_simulate_errors = self.op.debug_simulate_errors
1544       op.verbose = self.op.verbose
1545       op.error_codes = self.op.error_codes
1546       try:
1547         op.skip_checks = self.op.skip_checks
1548       except AttributeError:
1549         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550
1551     return ResultWithJobs(jobs)
1552
1553
1554 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555   """Verifies the cluster config.
1556
1557   """
1558   REQ_BGL = True
1559
1560   def _VerifyHVP(self, hvp_data):
1561     """Verifies locally the syntax of the hypervisor parameters.
1562
1563     """
1564     for item, hv_name, hv_params in hvp_data:
1565       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566              (item, hv_name))
1567       try:
1568         hv_class = hypervisor.GetHypervisor(hv_name)
1569         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570         hv_class.CheckParameterSyntax(hv_params)
1571       except errors.GenericError, err:
1572         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573
1574   def ExpandNames(self):
1575     # Information can be safely retrieved as the BGL is acquired in exclusive
1576     # mode
1577     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579     self.all_node_info = self.cfg.GetAllNodesInfo()
1580     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581     self.needed_locks = {}
1582
1583   def Exec(self, feedback_fn):
1584     """Verify integrity of cluster, performing various test on nodes.
1585
1586     """
1587     self.bad = False
1588     self._feedback_fn = feedback_fn
1589
1590     feedback_fn("* Verifying cluster config")
1591
1592     for msg in self.cfg.VerifyConfig():
1593       self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594
1595     feedback_fn("* Verifying cluster certificate files")
1596
1597     for cert_filename in constants.ALL_CERT_FILES:
1598       (errcode, msg) = _VerifyCertificate(cert_filename)
1599       self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600
1601     feedback_fn("* Verifying hypervisor parameters")
1602
1603     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604                                                 self.all_inst_info.values()))
1605
1606     feedback_fn("* Verifying all nodes belong to an existing group")
1607
1608     # We do this verification here because, should this bogus circumstance
1609     # occur, it would never be caught by VerifyGroup, which only acts on
1610     # nodes/instances reachable from existing node groups.
1611
1612     dangling_nodes = set(node.name for node in self.all_node_info.values()
1613                          if node.group not in self.all_group_info)
1614
1615     dangling_instances = {}
1616     no_node_instances = []
1617
1618     for inst in self.all_inst_info.values():
1619       if inst.primary_node in dangling_nodes:
1620         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621       elif inst.primary_node not in self.all_node_info:
1622         no_node_instances.append(inst.name)
1623
1624     pretty_dangling = [
1625         "%s (%s)" %
1626         (node.name,
1627          utils.CommaJoin(dangling_instances.get(node.name,
1628                                                 ["no instances"])))
1629         for node in dangling_nodes]
1630
1631     self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632                   "the following nodes (and their instances) belong to a non"
1633                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1634
1635     self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636                   "the following instances have a non-existing primary-node:"
1637                   " %s", utils.CommaJoin(no_node_instances))
1638
1639     return not self.bad
1640
1641
1642 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643   """Verifies the status of a node group.
1644
1645   """
1646   HPATH = "cluster-verify"
1647   HTYPE = constants.HTYPE_CLUSTER
1648   REQ_BGL = False
1649
1650   _HOOKS_INDENT_RE = re.compile("^", re.M)
1651
1652   class NodeImage(object):
1653     """A class representing the logical and physical status of a node.
1654
1655     @type name: string
1656     @ivar name: the node name to which this object refers
1657     @ivar volumes: a structure as returned from
1658         L{ganeti.backend.GetVolumeList} (runtime)
1659     @ivar instances: a list of running instances (runtime)
1660     @ivar pinst: list of configured primary instances (config)
1661     @ivar sinst: list of configured secondary instances (config)
1662     @ivar sbp: dictionary of {primary-node: list of instances} for all
1663         instances for which this node is secondary (config)
1664     @ivar mfree: free memory, as reported by hypervisor (runtime)
1665     @ivar dfree: free disk, as reported by the node (runtime)
1666     @ivar offline: the offline status (config)
1667     @type rpc_fail: boolean
1668     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669         not whether the individual keys were correct) (runtime)
1670     @type lvm_fail: boolean
1671     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672     @type hyp_fail: boolean
1673     @ivar hyp_fail: whether the RPC call didn't return the instance list
1674     @type ghost: boolean
1675     @ivar ghost: whether this is a known node or not (config)
1676     @type os_fail: boolean
1677     @ivar os_fail: whether the RPC call didn't return valid OS data
1678     @type oslist: list
1679     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680     @type vm_capable: boolean
1681     @ivar vm_capable: whether the node can host instances
1682
1683     """
1684     def __init__(self, offline=False, name=None, vm_capable=True):
1685       self.name = name
1686       self.volumes = {}
1687       self.instances = []
1688       self.pinst = []
1689       self.sinst = []
1690       self.sbp = {}
1691       self.mfree = 0
1692       self.dfree = 0
1693       self.offline = offline
1694       self.vm_capable = vm_capable
1695       self.rpc_fail = False
1696       self.lvm_fail = False
1697       self.hyp_fail = False
1698       self.ghost = False
1699       self.os_fail = False
1700       self.oslist = {}
1701
1702   def ExpandNames(self):
1703     # This raises errors.OpPrereqError on its own:
1704     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705
1706     # Get instances in node group; this is unsafe and needs verification later
1707     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1708
1709     self.needed_locks = {
1710       locking.LEVEL_INSTANCE: inst_names,
1711       locking.LEVEL_NODEGROUP: [self.group_uuid],
1712       locking.LEVEL_NODE: [],
1713       }
1714
1715     self.share_locks = _ShareAll()
1716
1717   def DeclareLocks(self, level):
1718     if level == locking.LEVEL_NODE:
1719       # Get members of node group; this is unsafe and needs verification later
1720       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1721
1722       all_inst_info = self.cfg.GetAllInstancesInfo()
1723
1724       # In Exec(), we warn about mirrored instances that have primary and
1725       # secondary living in separate node groups. To fully verify that
1726       # volumes for these instances are healthy, we will need to do an
1727       # extra call to their secondaries. We ensure here those nodes will
1728       # be locked.
1729       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730         # Important: access only the instances whose lock is owned
1731         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732           nodes.update(all_inst_info[inst].secondary_nodes)
1733
1734       self.needed_locks[locking.LEVEL_NODE] = nodes
1735
1736   def CheckPrereq(self):
1737     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1739
1740     group_nodes = set(self.group_info.members)
1741     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1742
1743     unlocked_nodes = \
1744         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1745
1746     unlocked_instances = \
1747         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1748
1749     if unlocked_nodes:
1750       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751                                  utils.CommaJoin(unlocked_nodes))
1752
1753     if unlocked_instances:
1754       raise errors.OpPrereqError("Missing lock for instances: %s" %
1755                                  utils.CommaJoin(unlocked_instances))
1756
1757     self.all_node_info = self.cfg.GetAllNodesInfo()
1758     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1759
1760     self.my_node_names = utils.NiceSort(group_nodes)
1761     self.my_inst_names = utils.NiceSort(group_instances)
1762
1763     self.my_node_info = dict((name, self.all_node_info[name])
1764                              for name in self.my_node_names)
1765
1766     self.my_inst_info = dict((name, self.all_inst_info[name])
1767                              for name in self.my_inst_names)
1768
1769     # We detect here the nodes that will need the extra RPC calls for verifying
1770     # split LV volumes; they should be locked.
1771     extra_lv_nodes = set()
1772
1773     for inst in self.my_inst_info.values():
1774       if inst.disk_template in constants.DTS_INT_MIRROR:
1775         group = self.my_node_info[inst.primary_node].group
1776         for nname in inst.secondary_nodes:
1777           if self.all_node_info[nname].group != group:
1778             extra_lv_nodes.add(nname)
1779
1780     unlocked_lv_nodes = \
1781         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1782
1783     if unlocked_lv_nodes:
1784       raise errors.OpPrereqError("these nodes could be locked: %s" %
1785                                  utils.CommaJoin(unlocked_lv_nodes))
1786     self.extra_lv_nodes = list(extra_lv_nodes)
1787
1788   def _VerifyNode(self, ninfo, nresult):
1789     """Perform some basic validation on data returned from a node.
1790
1791       - check the result data structure is well formed and has all the
1792         mandatory fields
1793       - check ganeti version
1794
1795     @type ninfo: L{objects.Node}
1796     @param ninfo: the node to check
1797     @param nresult: the results from the node
1798     @rtype: boolean
1799     @return: whether overall this call was successful (and we can expect
1800          reasonable values in the respose)
1801
1802     """
1803     node = ninfo.name
1804     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1805
1806     # main result, nresult should be a non-empty dict
1807     test = not nresult or not isinstance(nresult, dict)
1808     _ErrorIf(test, self.ENODERPC, node,
1809                   "unable to verify node: no data returned")
1810     if test:
1811       return False
1812
1813     # compares ganeti version
1814     local_version = constants.PROTOCOL_VERSION
1815     remote_version = nresult.get("version", None)
1816     test = not (remote_version and
1817                 isinstance(remote_version, (list, tuple)) and
1818                 len(remote_version) == 2)
1819     _ErrorIf(test, self.ENODERPC, node,
1820              "connection to node returned invalid data")
1821     if test:
1822       return False
1823
1824     test = local_version != remote_version[0]
1825     _ErrorIf(test, self.ENODEVERSION, node,
1826              "incompatible protocol versions: master %s,"
1827              " node %s", local_version, remote_version[0])
1828     if test:
1829       return False
1830
1831     # node seems compatible, we can actually try to look into its results
1832
1833     # full package version
1834     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835                   self.ENODEVERSION, node,
1836                   "software version mismatch: master %s, node %s",
1837                   constants.RELEASE_VERSION, remote_version[1],
1838                   code=self.ETYPE_WARNING)
1839
1840     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841     if ninfo.vm_capable and isinstance(hyp_result, dict):
1842       for hv_name, hv_result in hyp_result.iteritems():
1843         test = hv_result is not None
1844         _ErrorIf(test, self.ENODEHV, node,
1845                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1846
1847     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848     if ninfo.vm_capable and isinstance(hvp_result, list):
1849       for item, hv_name, hv_result in hvp_result:
1850         _ErrorIf(True, self.ENODEHV, node,
1851                  "hypervisor %s parameter verify failure (source %s): %s",
1852                  hv_name, item, hv_result)
1853
1854     test = nresult.get(constants.NV_NODESETUP,
1855                        ["Missing NODESETUP results"])
1856     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1857              "; ".join(test))
1858
1859     return True
1860
1861   def _VerifyNodeTime(self, ninfo, nresult,
1862                       nvinfo_starttime, nvinfo_endtime):
1863     """Check the node time.
1864
1865     @type ninfo: L{objects.Node}
1866     @param ninfo: the node to check
1867     @param nresult: the remote results for the node
1868     @param nvinfo_starttime: the start time of the RPC call
1869     @param nvinfo_endtime: the end time of the RPC call
1870
1871     """
1872     node = ninfo.name
1873     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1874
1875     ntime = nresult.get(constants.NV_TIME, None)
1876     try:
1877       ntime_merged = utils.MergeTime(ntime)
1878     except (ValueError, TypeError):
1879       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1880       return
1881
1882     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1886     else:
1887       ntime_diff = None
1888
1889     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890              "Node time diverges by at least %s from master node time",
1891              ntime_diff)
1892
1893   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894     """Check the node LVM results.
1895
1896     @type ninfo: L{objects.Node}
1897     @param ninfo: the node to check
1898     @param nresult: the remote results for the node
1899     @param vg_name: the configured VG name
1900
1901     """
1902     if vg_name is None:
1903       return
1904
1905     node = ninfo.name
1906     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1907
1908     # checks vg existence and size > 20G
1909     vglist = nresult.get(constants.NV_VGLIST, None)
1910     test = not vglist
1911     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1912     if not test:
1913       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914                                             constants.MIN_VG_SIZE)
1915       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1916
1917     # check pv names
1918     pvlist = nresult.get(constants.NV_PVLIST, None)
1919     test = pvlist is None
1920     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1921     if not test:
1922       # check that ':' is not present in PV names, since it's a
1923       # special character for lvcreate (denotes the range of PEs to
1924       # use on the PV)
1925       for _, pvname, owner_vg in pvlist:
1926         test = ":" in pvname
1927         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928                  " '%s' of VG '%s'", pvname, owner_vg)
1929
1930   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931     """Check the node bridges.
1932
1933     @type ninfo: L{objects.Node}
1934     @param ninfo: the node to check
1935     @param nresult: the remote results for the node
1936     @param bridges: the expected list of bridges
1937
1938     """
1939     if not bridges:
1940       return
1941
1942     node = ninfo.name
1943     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1944
1945     missing = nresult.get(constants.NV_BRIDGES, None)
1946     test = not isinstance(missing, list)
1947     _ErrorIf(test, self.ENODENET, node,
1948              "did not return valid bridge information")
1949     if not test:
1950       _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951                utils.CommaJoin(sorted(missing)))
1952
1953   def _VerifyNodeNetwork(self, ninfo, nresult):
1954     """Check the node network connectivity results.
1955
1956     @type ninfo: L{objects.Node}
1957     @param ninfo: the node to check
1958     @param nresult: the remote results for the node
1959
1960     """
1961     node = ninfo.name
1962     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1963
1964     test = constants.NV_NODELIST not in nresult
1965     _ErrorIf(test, self.ENODESSH, node,
1966              "node hasn't returned node ssh connectivity data")
1967     if not test:
1968       if nresult[constants.NV_NODELIST]:
1969         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970           _ErrorIf(True, self.ENODESSH, node,
1971                    "ssh communication with node '%s': %s", a_node, a_msg)
1972
1973     test = constants.NV_NODENETTEST not in nresult
1974     _ErrorIf(test, self.ENODENET, node,
1975              "node hasn't returned node tcp connectivity data")
1976     if not test:
1977       if nresult[constants.NV_NODENETTEST]:
1978         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1979         for anode in nlist:
1980           _ErrorIf(True, self.ENODENET, node,
1981                    "tcp communication with node '%s': %s",
1982                    anode, nresult[constants.NV_NODENETTEST][anode])
1983
1984     test = constants.NV_MASTERIP not in nresult
1985     _ErrorIf(test, self.ENODENET, node,
1986              "node hasn't returned node master IP reachability data")
1987     if not test:
1988       if not nresult[constants.NV_MASTERIP]:
1989         if node == self.master_node:
1990           msg = "the master node cannot reach the master IP (not configured?)"
1991         else:
1992           msg = "cannot reach the master IP"
1993         _ErrorIf(True, self.ENODENET, node, msg)
1994
1995   def _VerifyInstance(self, instance, instanceconfig, node_image,
1996                       diskstatus):
1997     """Verify an instance.
1998
1999     This function checks to see if the required block devices are
2000     available on the instance's node.
2001
2002     """
2003     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004     node_current = instanceconfig.primary_node
2005
2006     node_vol_should = {}
2007     instanceconfig.MapLVsByNode(node_vol_should)
2008
2009     for node in node_vol_should:
2010       n_img = node_image[node]
2011       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012         # ignore missing volumes on offline or broken nodes
2013         continue
2014       for volume in node_vol_should[node]:
2015         test = volume not in n_img.volumes
2016         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017                  "volume %s missing on node %s", volume, node)
2018
2019     if instanceconfig.admin_up:
2020       pri_img = node_image[node_current]
2021       test = instance not in pri_img.instances and not pri_img.offline
2022       _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023                "instance not running on its primary node %s",
2024                node_current)
2025
2026     diskdata = [(nname, success, status, idx)
2027                 for (nname, disks) in diskstatus.items()
2028                 for idx, (success, status) in enumerate(disks)]
2029
2030     for nname, success, bdev_status, idx in diskdata:
2031       # the 'ghost node' construction in Exec() ensures that we have a
2032       # node here
2033       snode = node_image[nname]
2034       bad_snode = snode.ghost or snode.offline
2035       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036                self.EINSTANCEFAULTYDISK, instance,
2037                "couldn't retrieve status for disk/%s on %s: %s",
2038                idx, nname, bdev_status)
2039       _ErrorIf((instanceconfig.admin_up and success and
2040                 bdev_status.ldisk_status == constants.LDS_FAULTY),
2041                self.EINSTANCEFAULTYDISK, instance,
2042                "disk/%s on %s is faulty", idx, nname)
2043
2044   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045     """Verify if there are any unknown volumes in the cluster.
2046
2047     The .os, .swap and backup volumes are ignored. All other volumes are
2048     reported as unknown.
2049
2050     @type reserved: L{ganeti.utils.FieldSet}
2051     @param reserved: a FieldSet of reserved volume names
2052
2053     """
2054     for node, n_img in node_image.items():
2055       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056         # skip non-healthy nodes
2057         continue
2058       for volume in n_img.volumes:
2059         test = ((node not in node_vol_should or
2060                 volume not in node_vol_should[node]) and
2061                 not reserved.Matches(volume))
2062         self._ErrorIf(test, self.ENODEORPHANLV, node,
2063                       "volume %s is unknown", volume)
2064
2065   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066     """Verify N+1 Memory Resilience.
2067
2068     Check that if one single node dies we can still start all the
2069     instances it was primary for.
2070
2071     """
2072     cluster_info = self.cfg.GetClusterInfo()
2073     for node, n_img in node_image.items():
2074       # This code checks that every node which is now listed as
2075       # secondary has enough memory to host all instances it is
2076       # supposed to should a single other node in the cluster fail.
2077       # FIXME: not ready for failover to an arbitrary node
2078       # FIXME: does not support file-backed instances
2079       # WARNING: we currently take into account down instances as well
2080       # as up ones, considering that even if they're down someone
2081       # might want to start them even in the event of a node failure.
2082       if n_img.offline:
2083         # we're skipping offline nodes from the N+1 warning, since
2084         # most likely we don't have good memory infromation from them;
2085         # we already list instances living on such nodes, and that's
2086         # enough warning
2087         continue
2088       for prinode, instances in n_img.sbp.items():
2089         needed_mem = 0
2090         for instance in instances:
2091           bep = cluster_info.FillBE(instance_cfg[instance])
2092           if bep[constants.BE_AUTO_BALANCE]:
2093             needed_mem += bep[constants.BE_MEMORY]
2094         test = n_img.mfree < needed_mem
2095         self._ErrorIf(test, self.ENODEN1, node,
2096                       "not enough memory to accomodate instance failovers"
2097                       " should node %s fail (%dMiB needed, %dMiB available)",
2098                       prinode, needed_mem, n_img.mfree)
2099
2100   @classmethod
2101   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102                    (files_all, files_all_opt, files_mc, files_vm)):
2103     """Verifies file checksums collected from all nodes.
2104
2105     @param errorif: Callback for reporting errors
2106     @param nodeinfo: List of L{objects.Node} objects
2107     @param master_node: Name of master node
2108     @param all_nvinfo: RPC results
2109
2110     """
2111     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2112             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2113            "Found file listed in more than one file list"
2114
2115     # Define functions determining which nodes to consider for a file
2116     files2nodefn = [
2117       (files_all, None),
2118       (files_all_opt, None),
2119       (files_mc, lambda node: (node.master_candidate or
2120                                node.name == master_node)),
2121       (files_vm, lambda node: node.vm_capable),
2122       ]
2123
2124     # Build mapping from filename to list of nodes which should have the file
2125     nodefiles = {}
2126     for (files, fn) in files2nodefn:
2127       if fn is None:
2128         filenodes = nodeinfo
2129       else:
2130         filenodes = filter(fn, nodeinfo)
2131       nodefiles.update((filename,
2132                         frozenset(map(operator.attrgetter("name"), filenodes)))
2133                        for filename in files)
2134
2135     assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2136
2137     fileinfo = dict((filename, {}) for filename in nodefiles)
2138     ignore_nodes = set()
2139
2140     for node in nodeinfo:
2141       if node.offline:
2142         ignore_nodes.add(node.name)
2143         continue
2144
2145       nresult = all_nvinfo[node.name]
2146
2147       if nresult.fail_msg or not nresult.payload:
2148         node_files = None
2149       else:
2150         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2151
2152       test = not (node_files and isinstance(node_files, dict))
2153       errorif(test, cls.ENODEFILECHECK, node.name,
2154               "Node did not return file checksum data")
2155       if test:
2156         ignore_nodes.add(node.name)
2157         continue
2158
2159       # Build per-checksum mapping from filename to nodes having it
2160       for (filename, checksum) in node_files.items():
2161         assert filename in nodefiles
2162         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2163
2164     for (filename, checksums) in fileinfo.items():
2165       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2166
2167       # Nodes having the file
2168       with_file = frozenset(node_name
2169                             for nodes in fileinfo[filename].values()
2170                             for node_name in nodes) - ignore_nodes
2171
2172       expected_nodes = nodefiles[filename] - ignore_nodes
2173
2174       # Nodes missing file
2175       missing_file = expected_nodes - with_file
2176
2177       if filename in files_all_opt:
2178         # All or no nodes
2179         errorif(missing_file and missing_file != expected_nodes,
2180                 cls.ECLUSTERFILECHECK, None,
2181                 "File %s is optional, but it must exist on all or no"
2182                 " nodes (not found on %s)",
2183                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2184       else:
2185         # Non-optional files
2186         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2187                 "File %s is missing from node(s) %s", filename,
2188                 utils.CommaJoin(utils.NiceSort(missing_file)))
2189
2190         # Warn if a node has a file it shouldn't
2191         unexpected = with_file - expected_nodes
2192         errorif(unexpected,
2193                 cls.ECLUSTERFILECHECK, None,
2194                 "File %s should not exist on node(s) %s",
2195                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2196
2197       # See if there are multiple versions of the file
2198       test = len(checksums) > 1
2199       if test:
2200         variants = ["variant %s on %s" %
2201                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2202                     for (idx, (checksum, nodes)) in
2203                       enumerate(sorted(checksums.items()))]
2204       else:
2205         variants = []
2206
2207       errorif(test, cls.ECLUSTERFILECHECK, None,
2208               "File %s found with %s different checksums (%s)",
2209               filename, len(checksums), "; ".join(variants))
2210
2211   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2212                       drbd_map):
2213     """Verifies and the node DRBD status.
2214
2215     @type ninfo: L{objects.Node}
2216     @param ninfo: the node to check
2217     @param nresult: the remote results for the node
2218     @param instanceinfo: the dict of instances
2219     @param drbd_helper: the configured DRBD usermode helper
2220     @param drbd_map: the DRBD map as returned by
2221         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2222
2223     """
2224     node = ninfo.name
2225     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2226
2227     if drbd_helper:
2228       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2229       test = (helper_result == None)
2230       _ErrorIf(test, self.ENODEDRBDHELPER, node,
2231                "no drbd usermode helper returned")
2232       if helper_result:
2233         status, payload = helper_result
2234         test = not status
2235         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2236                  "drbd usermode helper check unsuccessful: %s", payload)
2237         test = status and (payload != drbd_helper)
2238         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2239                  "wrong drbd usermode helper: %s", payload)
2240
2241     # compute the DRBD minors
2242     node_drbd = {}
2243     for minor, instance in drbd_map[node].items():
2244       test = instance not in instanceinfo
2245       _ErrorIf(test, self.ECLUSTERCFG, None,
2246                "ghost instance '%s' in temporary DRBD map", instance)
2247         # ghost instance should not be running, but otherwise we
2248         # don't give double warnings (both ghost instance and
2249         # unallocated minor in use)
2250       if test:
2251         node_drbd[minor] = (instance, False)
2252       else:
2253         instance = instanceinfo[instance]
2254         node_drbd[minor] = (instance.name, instance.admin_up)
2255
2256     # and now check them
2257     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2258     test = not isinstance(used_minors, (tuple, list))
2259     _ErrorIf(test, self.ENODEDRBD, node,
2260              "cannot parse drbd status file: %s", str(used_minors))
2261     if test:
2262       # we cannot check drbd status
2263       return
2264
2265     for minor, (iname, must_exist) in node_drbd.items():
2266       test = minor not in used_minors and must_exist
2267       _ErrorIf(test, self.ENODEDRBD, node,
2268                "drbd minor %d of instance %s is not active", minor, iname)
2269     for minor in used_minors:
2270       test = minor not in node_drbd
2271       _ErrorIf(test, self.ENODEDRBD, node,
2272                "unallocated drbd minor %d is in use", minor)
2273
2274   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2275     """Builds the node OS structures.
2276
2277     @type ninfo: L{objects.Node}
2278     @param ninfo: the node to check
2279     @param nresult: the remote results for the node
2280     @param nimg: the node image object
2281
2282     """
2283     node = ninfo.name
2284     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2285
2286     remote_os = nresult.get(constants.NV_OSLIST, None)
2287     test = (not isinstance(remote_os, list) or
2288             not compat.all(isinstance(v, list) and len(v) == 7
2289                            for v in remote_os))
2290
2291     _ErrorIf(test, self.ENODEOS, node,
2292              "node hasn't returned valid OS data")
2293
2294     nimg.os_fail = test
2295
2296     if test:
2297       return
2298
2299     os_dict = {}
2300
2301     for (name, os_path, status, diagnose,
2302          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2303
2304       if name not in os_dict:
2305         os_dict[name] = []
2306
2307       # parameters is a list of lists instead of list of tuples due to
2308       # JSON lacking a real tuple type, fix it:
2309       parameters = [tuple(v) for v in parameters]
2310       os_dict[name].append((os_path, status, diagnose,
2311                             set(variants), set(parameters), set(api_ver)))
2312
2313     nimg.oslist = os_dict
2314
2315   def _VerifyNodeOS(self, ninfo, nimg, base):
2316     """Verifies the node OS list.
2317
2318     @type ninfo: L{objects.Node}
2319     @param ninfo: the node to check
2320     @param nimg: the node image object
2321     @param base: the 'template' node we match against (e.g. from the master)
2322
2323     """
2324     node = ninfo.name
2325     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2326
2327     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2328
2329     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2330     for os_name, os_data in nimg.oslist.items():
2331       assert os_data, "Empty OS status for OS %s?!" % os_name
2332       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2333       _ErrorIf(not f_status, self.ENODEOS, node,
2334                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2335       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2336                "OS '%s' has multiple entries (first one shadows the rest): %s",
2337                os_name, utils.CommaJoin([v[0] for v in os_data]))
2338       # comparisons with the 'base' image
2339       test = os_name not in base.oslist
2340       _ErrorIf(test, self.ENODEOS, node,
2341                "Extra OS %s not present on reference node (%s)",
2342                os_name, base.name)
2343       if test:
2344         continue
2345       assert base.oslist[os_name], "Base node has empty OS status?"
2346       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2347       if not b_status:
2348         # base OS is invalid, skipping
2349         continue
2350       for kind, a, b in [("API version", f_api, b_api),
2351                          ("variants list", f_var, b_var),
2352                          ("parameters", beautify_params(f_param),
2353                           beautify_params(b_param))]:
2354         _ErrorIf(a != b, self.ENODEOS, node,
2355                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2356                  kind, os_name, base.name,
2357                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2358
2359     # check any missing OSes
2360     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2361     _ErrorIf(missing, self.ENODEOS, node,
2362              "OSes present on reference node %s but missing on this node: %s",
2363              base.name, utils.CommaJoin(missing))
2364
2365   def _VerifyOob(self, ninfo, nresult):
2366     """Verifies out of band functionality of a node.
2367
2368     @type ninfo: L{objects.Node}
2369     @param ninfo: the node to check
2370     @param nresult: the remote results for the node
2371
2372     """
2373     node = ninfo.name
2374     # We just have to verify the paths on master and/or master candidates
2375     # as the oob helper is invoked on the master
2376     if ((ninfo.master_candidate or ninfo.master_capable) and
2377         constants.NV_OOB_PATHS in nresult):
2378       for path_result in nresult[constants.NV_OOB_PATHS]:
2379         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2380
2381   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2382     """Verifies and updates the node volume data.
2383
2384     This function will update a L{NodeImage}'s internal structures
2385     with data from the remote call.
2386
2387     @type ninfo: L{objects.Node}
2388     @param ninfo: the node to check
2389     @param nresult: the remote results for the node
2390     @param nimg: the node image object
2391     @param vg_name: the configured VG name
2392
2393     """
2394     node = ninfo.name
2395     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2396
2397     nimg.lvm_fail = True
2398     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2399     if vg_name is None:
2400       pass
2401     elif isinstance(lvdata, basestring):
2402       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2403                utils.SafeEncode(lvdata))
2404     elif not isinstance(lvdata, dict):
2405       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2406     else:
2407       nimg.volumes = lvdata
2408       nimg.lvm_fail = False
2409
2410   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2411     """Verifies and updates the node instance list.
2412
2413     If the listing was successful, then updates this node's instance
2414     list. Otherwise, it marks the RPC call as failed for the instance
2415     list key.
2416
2417     @type ninfo: L{objects.Node}
2418     @param ninfo: the node to check
2419     @param nresult: the remote results for the node
2420     @param nimg: the node image object
2421
2422     """
2423     idata = nresult.get(constants.NV_INSTANCELIST, None)
2424     test = not isinstance(idata, list)
2425     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2426                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2427     if test:
2428       nimg.hyp_fail = True
2429     else:
2430       nimg.instances = idata
2431
2432   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2433     """Verifies and computes a node information map
2434
2435     @type ninfo: L{objects.Node}
2436     @param ninfo: the node to check
2437     @param nresult: the remote results for the node
2438     @param nimg: the node image object
2439     @param vg_name: the configured VG name
2440
2441     """
2442     node = ninfo.name
2443     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2444
2445     # try to read free memory (from the hypervisor)
2446     hv_info = nresult.get(constants.NV_HVINFO, None)
2447     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2448     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2449     if not test:
2450       try:
2451         nimg.mfree = int(hv_info["memory_free"])
2452       except (ValueError, TypeError):
2453         _ErrorIf(True, self.ENODERPC, node,
2454                  "node returned invalid nodeinfo, check hypervisor")
2455
2456     # FIXME: devise a free space model for file based instances as well
2457     if vg_name is not None:
2458       test = (constants.NV_VGLIST not in nresult or
2459               vg_name not in nresult[constants.NV_VGLIST])
2460       _ErrorIf(test, self.ENODELVM, node,
2461                "node didn't return data for the volume group '%s'"
2462                " - it is either missing or broken", vg_name)
2463       if not test:
2464         try:
2465           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2466         except (ValueError, TypeError):
2467           _ErrorIf(True, self.ENODERPC, node,
2468                    "node returned invalid LVM info, check LVM status")
2469
2470   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2471     """Gets per-disk status information for all instances.
2472
2473     @type nodelist: list of strings
2474     @param nodelist: Node names
2475     @type node_image: dict of (name, L{objects.Node})
2476     @param node_image: Node objects
2477     @type instanceinfo: dict of (name, L{objects.Instance})
2478     @param instanceinfo: Instance objects
2479     @rtype: {instance: {node: [(succes, payload)]}}
2480     @return: a dictionary of per-instance dictionaries with nodes as
2481         keys and disk information as values; the disk information is a
2482         list of tuples (success, payload)
2483
2484     """
2485     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2486
2487     node_disks = {}
2488     node_disks_devonly = {}
2489     diskless_instances = set()
2490     diskless = constants.DT_DISKLESS
2491
2492     for nname in nodelist:
2493       node_instances = list(itertools.chain(node_image[nname].pinst,
2494                                             node_image[nname].sinst))
2495       diskless_instances.update(inst for inst in node_instances
2496                                 if instanceinfo[inst].disk_template == diskless)
2497       disks = [(inst, disk)
2498                for inst in node_instances
2499                for disk in instanceinfo[inst].disks]
2500
2501       if not disks:
2502         # No need to collect data
2503         continue
2504
2505       node_disks[nname] = disks
2506
2507       # Creating copies as SetDiskID below will modify the objects and that can
2508       # lead to incorrect data returned from nodes
2509       devonly = [dev.Copy() for (_, dev) in disks]
2510
2511       for dev in devonly:
2512         self.cfg.SetDiskID(dev, nname)
2513
2514       node_disks_devonly[nname] = devonly
2515
2516     assert len(node_disks) == len(node_disks_devonly)
2517
2518     # Collect data from all nodes with disks
2519     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2520                                                           node_disks_devonly)
2521
2522     assert len(result) == len(node_disks)
2523
2524     instdisk = {}
2525
2526     for (nname, nres) in result.items():
2527       disks = node_disks[nname]
2528
2529       if nres.offline:
2530         # No data from this node
2531         data = len(disks) * [(False, "node offline")]
2532       else:
2533         msg = nres.fail_msg
2534         _ErrorIf(msg, self.ENODERPC, nname,
2535                  "while getting disk information: %s", msg)
2536         if msg:
2537           # No data from this node
2538           data = len(disks) * [(False, msg)]
2539         else:
2540           data = []
2541           for idx, i in enumerate(nres.payload):
2542             if isinstance(i, (tuple, list)) and len(i) == 2:
2543               data.append(i)
2544             else:
2545               logging.warning("Invalid result from node %s, entry %d: %s",
2546                               nname, idx, i)
2547               data.append((False, "Invalid result from the remote node"))
2548
2549       for ((inst, _), status) in zip(disks, data):
2550         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2551
2552     # Add empty entries for diskless instances.
2553     for inst in diskless_instances:
2554       assert inst not in instdisk
2555       instdisk[inst] = {}
2556
2557     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2558                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2559                       compat.all(isinstance(s, (tuple, list)) and
2560                                  len(s) == 2 for s in statuses)
2561                       for inst, nnames in instdisk.items()
2562                       for nname, statuses in nnames.items())
2563     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2564
2565     return instdisk
2566
2567   @staticmethod
2568   def _SshNodeSelector(group_uuid, all_nodes):
2569     """Create endless iterators for all potential SSH check hosts.
2570
2571     """
2572     nodes = [node for node in all_nodes
2573              if (node.group != group_uuid and
2574                  not node.offline)]
2575     keyfunc = operator.attrgetter("group")
2576
2577     return map(itertools.cycle,
2578                [sorted(map(operator.attrgetter("name"), names))
2579                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2580                                                   keyfunc)])
2581
2582   @classmethod
2583   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2584     """Choose which nodes should talk to which other nodes.
2585
2586     We will make nodes contact all nodes in their group, and one node from
2587     every other group.
2588
2589     @warning: This algorithm has a known issue if one node group is much
2590       smaller than others (e.g. just one node). In such a case all other
2591       nodes will talk to the single node.
2592
2593     """
2594     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2595     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2596
2597     return (online_nodes,
2598             dict((name, sorted([i.next() for i in sel]))
2599                  for name in online_nodes))
2600
2601   def BuildHooksEnv(self):
2602     """Build hooks env.
2603
2604     Cluster-Verify hooks just ran in the post phase and their failure makes
2605     the output be logged in the verify output and the verification to fail.
2606
2607     """
2608     env = {
2609       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2610       }
2611
2612     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2613                for node in self.my_node_info.values())
2614
2615     return env
2616
2617   def BuildHooksNodes(self):
2618     """Build hooks nodes.
2619
2620     """
2621     return ([], self.my_node_names)
2622
2623   def Exec(self, feedback_fn):
2624     """Verify integrity of the node group, performing various test on nodes.
2625
2626     """
2627     # This method has too many local variables. pylint: disable=R0914
2628     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2629
2630     if not self.my_node_names:
2631       # empty node group
2632       feedback_fn("* Empty node group, skipping verification")
2633       return True
2634
2635     self.bad = False
2636     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2637     verbose = self.op.verbose
2638     self._feedback_fn = feedback_fn
2639
2640     vg_name = self.cfg.GetVGName()
2641     drbd_helper = self.cfg.GetDRBDHelper()
2642     cluster = self.cfg.GetClusterInfo()
2643     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2644     hypervisors = cluster.enabled_hypervisors
2645     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2646
2647     i_non_redundant = [] # Non redundant instances
2648     i_non_a_balanced = [] # Non auto-balanced instances
2649     n_offline = 0 # Count of offline nodes
2650     n_drained = 0 # Count of nodes being drained
2651     node_vol_should = {}
2652
2653     # FIXME: verify OS list
2654
2655     # File verification
2656     filemap = _ComputeAncillaryFiles(cluster, False)
2657
2658     # do local checksums
2659     master_node = self.master_node = self.cfg.GetMasterNode()
2660     master_ip = self.cfg.GetMasterIP()
2661
2662     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2663
2664     node_verify_param = {
2665       constants.NV_FILELIST:
2666         utils.UniqueSequence(filename
2667                              for files in filemap
2668                              for filename in files),
2669       constants.NV_NODELIST:
2670         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2671                                   self.all_node_info.values()),
2672       constants.NV_HYPERVISOR: hypervisors,
2673       constants.NV_HVPARAMS:
2674         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2675       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2676                                  for node in node_data_list
2677                                  if not node.offline],
2678       constants.NV_INSTANCELIST: hypervisors,
2679       constants.NV_VERSION: None,
2680       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2681       constants.NV_NODESETUP: None,
2682       constants.NV_TIME: None,
2683       constants.NV_MASTERIP: (master_node, master_ip),
2684       constants.NV_OSLIST: None,
2685       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2686       }
2687
2688     if vg_name is not None:
2689       node_verify_param[constants.NV_VGLIST] = None
2690       node_verify_param[constants.NV_LVLIST] = vg_name
2691       node_verify_param[constants.NV_PVLIST] = [vg_name]
2692       node_verify_param[constants.NV_DRBDLIST] = None
2693
2694     if drbd_helper:
2695       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2696
2697     # bridge checks
2698     # FIXME: this needs to be changed per node-group, not cluster-wide
2699     bridges = set()
2700     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2701     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2702       bridges.add(default_nicpp[constants.NIC_LINK])
2703     for instance in self.my_inst_info.values():
2704       for nic in instance.nics:
2705         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2706         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2707           bridges.add(full_nic[constants.NIC_LINK])
2708
2709     if bridges:
2710       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2711
2712     # Build our expected cluster state
2713     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2714                                                  name=node.name,
2715                                                  vm_capable=node.vm_capable))
2716                       for node in node_data_list)
2717
2718     # Gather OOB paths
2719     oob_paths = []
2720     for node in self.all_node_info.values():
2721       path = _SupportsOob(self.cfg, node)
2722       if path and path not in oob_paths:
2723         oob_paths.append(path)
2724
2725     if oob_paths:
2726       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2727
2728     for instance in self.my_inst_names:
2729       inst_config = self.my_inst_info[instance]
2730
2731       for nname in inst_config.all_nodes:
2732         if nname not in node_image:
2733           gnode = self.NodeImage(name=nname)
2734           gnode.ghost = (nname not in self.all_node_info)
2735           node_image[nname] = gnode
2736
2737       inst_config.MapLVsByNode(node_vol_should)
2738
2739       pnode = inst_config.primary_node
2740       node_image[pnode].pinst.append(instance)
2741
2742       for snode in inst_config.secondary_nodes:
2743         nimg = node_image[snode]
2744         nimg.sinst.append(instance)
2745         if pnode not in nimg.sbp:
2746           nimg.sbp[pnode] = []
2747         nimg.sbp[pnode].append(instance)
2748
2749     # At this point, we have the in-memory data structures complete,
2750     # except for the runtime information, which we'll gather next
2751
2752     # Due to the way our RPC system works, exact response times cannot be
2753     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2754     # time before and after executing the request, we can at least have a time
2755     # window.
2756     nvinfo_starttime = time.time()
2757     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2758                                            node_verify_param,
2759                                            self.cfg.GetClusterName())
2760     nvinfo_endtime = time.time()
2761
2762     if self.extra_lv_nodes and vg_name is not None:
2763       extra_lv_nvinfo = \
2764           self.rpc.call_node_verify(self.extra_lv_nodes,
2765                                     {constants.NV_LVLIST: vg_name},
2766                                     self.cfg.GetClusterName())
2767     else:
2768       extra_lv_nvinfo = {}
2769
2770     all_drbd_map = self.cfg.ComputeDRBDMap()
2771
2772     feedback_fn("* Gathering disk information (%s nodes)" %
2773                 len(self.my_node_names))
2774     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2775                                      self.my_inst_info)
2776
2777     feedback_fn("* Verifying configuration file consistency")
2778
2779     # If not all nodes are being checked, we need to make sure the master node
2780     # and a non-checked vm_capable node are in the list.
2781     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2782     if absent_nodes:
2783       vf_nvinfo = all_nvinfo.copy()
2784       vf_node_info = list(self.my_node_info.values())
2785       additional_nodes = []
2786       if master_node not in self.my_node_info:
2787         additional_nodes.append(master_node)
2788         vf_node_info.append(self.all_node_info[master_node])
2789       # Add the first vm_capable node we find which is not included
2790       for node in absent_nodes:
2791         nodeinfo = self.all_node_info[node]
2792         if nodeinfo.vm_capable and not nodeinfo.offline:
2793           additional_nodes.append(node)
2794           vf_node_info.append(self.all_node_info[node])
2795           break
2796       key = constants.NV_FILELIST
2797       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2798                                                  {key: node_verify_param[key]},
2799                                                  self.cfg.GetClusterName()))
2800     else:
2801       vf_nvinfo = all_nvinfo
2802       vf_node_info = self.my_node_info.values()
2803
2804     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2805
2806     feedback_fn("* Verifying node status")
2807
2808     refos_img = None
2809
2810     for node_i in node_data_list:
2811       node = node_i.name
2812       nimg = node_image[node]
2813
2814       if node_i.offline:
2815         if verbose:
2816           feedback_fn("* Skipping offline node %s" % (node,))
2817         n_offline += 1
2818         continue
2819
2820       if node == master_node:
2821         ntype = "master"
2822       elif node_i.master_candidate:
2823         ntype = "master candidate"
2824       elif node_i.drained:
2825         ntype = "drained"
2826         n_drained += 1
2827       else:
2828         ntype = "regular"
2829       if verbose:
2830         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2831
2832       msg = all_nvinfo[node].fail_msg
2833       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2834       if msg:
2835         nimg.rpc_fail = True
2836         continue
2837
2838       nresult = all_nvinfo[node].payload
2839
2840       nimg.call_ok = self._VerifyNode(node_i, nresult)
2841       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2842       self._VerifyNodeNetwork(node_i, nresult)
2843       self._VerifyOob(node_i, nresult)
2844
2845       if nimg.vm_capable:
2846         self._VerifyNodeLVM(node_i, nresult, vg_name)
2847         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2848                              all_drbd_map)
2849
2850         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2851         self._UpdateNodeInstances(node_i, nresult, nimg)
2852         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2853         self._UpdateNodeOS(node_i, nresult, nimg)
2854
2855         if not nimg.os_fail:
2856           if refos_img is None:
2857             refos_img = nimg
2858           self._VerifyNodeOS(node_i, nimg, refos_img)
2859         self._VerifyNodeBridges(node_i, nresult, bridges)
2860
2861         # Check whether all running instancies are primary for the node. (This
2862         # can no longer be done from _VerifyInstance below, since some of the
2863         # wrong instances could be from other node groups.)
2864         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2865
2866         for inst in non_primary_inst:
2867           test = inst in self.all_inst_info
2868           _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2869                    "instance should not run on node %s", node_i.name)
2870           _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2871                    "node is running unknown instance %s", inst)
2872
2873     for node, result in extra_lv_nvinfo.items():
2874       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2875                               node_image[node], vg_name)
2876
2877     feedback_fn("* Verifying instance status")
2878     for instance in self.my_inst_names:
2879       if verbose:
2880         feedback_fn("* Verifying instance %s" % instance)
2881       inst_config = self.my_inst_info[instance]
2882       self._VerifyInstance(instance, inst_config, node_image,
2883                            instdisk[instance])
2884       inst_nodes_offline = []
2885
2886       pnode = inst_config.primary_node
2887       pnode_img = node_image[pnode]
2888       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2889                self.ENODERPC, pnode, "instance %s, connection to"
2890                " primary node failed", instance)
2891
2892       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2893                self.EINSTANCEBADNODE, instance,
2894                "instance is marked as running and lives on offline node %s",
2895                inst_config.primary_node)
2896
2897       # If the instance is non-redundant we cannot survive losing its primary
2898       # node, so we are not N+1 compliant. On the other hand we have no disk
2899       # templates with more than one secondary so that situation is not well
2900       # supported either.
2901       # FIXME: does not support file-backed instances
2902       if not inst_config.secondary_nodes:
2903         i_non_redundant.append(instance)
2904
2905       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2906                instance, "instance has multiple secondary nodes: %s",
2907                utils.CommaJoin(inst_config.secondary_nodes),
2908                code=self.ETYPE_WARNING)
2909
2910       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2911         pnode = inst_config.primary_node
2912         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2913         instance_groups = {}
2914
2915         for node in instance_nodes:
2916           instance_groups.setdefault(self.all_node_info[node].group,
2917                                      []).append(node)
2918
2919         pretty_list = [
2920           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2921           # Sort so that we always list the primary node first.
2922           for group, nodes in sorted(instance_groups.items(),
2923                                      key=lambda (_, nodes): pnode in nodes,
2924                                      reverse=True)]
2925
2926         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2927                       instance, "instance has primary and secondary nodes in"
2928                       " different groups: %s", utils.CommaJoin(pretty_list),
2929                       code=self.ETYPE_WARNING)
2930
2931       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2932         i_non_a_balanced.append(instance)
2933
2934       for snode in inst_config.secondary_nodes:
2935         s_img = node_image[snode]
2936         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2937                  "instance %s, connection to secondary node failed", instance)
2938
2939         if s_img.offline:
2940           inst_nodes_offline.append(snode)
2941
2942       # warn that the instance lives on offline nodes
2943       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2944                "instance has offline secondary node(s) %s",
2945                utils.CommaJoin(inst_nodes_offline))
2946       # ... or ghost/non-vm_capable nodes
2947       for node in inst_config.all_nodes:
2948         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2949                  "instance lives on ghost node %s", node)
2950         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2951                  instance, "instance lives on non-vm_capable node %s", node)
2952
2953     feedback_fn("* Verifying orphan volumes")
2954     reserved = utils.FieldSet(*cluster.reserved_lvs)
2955
2956     # We will get spurious "unknown volume" warnings if any node of this group
2957     # is secondary for an instance whose primary is in another group. To avoid
2958     # them, we find these instances and add their volumes to node_vol_should.
2959     for inst in self.all_inst_info.values():
2960       for secondary in inst.secondary_nodes:
2961         if (secondary in self.my_node_info
2962             and inst.name not in self.my_inst_info):
2963           inst.MapLVsByNode(node_vol_should)
2964           break
2965
2966     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2967
2968     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2969       feedback_fn("* Verifying N+1 Memory redundancy")
2970       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2971
2972     feedback_fn("* Other Notes")
2973     if i_non_redundant:
2974       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2975                   % len(i_non_redundant))
2976
2977     if i_non_a_balanced:
2978       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2979                   % len(i_non_a_balanced))
2980
2981     if n_offline:
2982       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2983
2984     if n_drained:
2985       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2986
2987     return not self.bad
2988
2989   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2990     """Analyze the post-hooks' result
2991
2992     This method analyses the hook result, handles it, and sends some
2993     nicely-formatted feedback back to the user.
2994
2995     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2996         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2997     @param hooks_results: the results of the multi-node hooks rpc call
2998     @param feedback_fn: function used send feedback back to the caller
2999     @param lu_result: previous Exec result
3000     @return: the new Exec result, based on the previous result
3001         and hook results
3002
3003     """
3004     # We only really run POST phase hooks, only for non-empty groups,
3005     # and are only interested in their results
3006     if not self.my_node_names:
3007       # empty node group
3008       pass
3009     elif phase == constants.HOOKS_PHASE_POST:
3010       # Used to change hooks' output to proper indentation
3011       feedback_fn("* Hooks Results")
3012       assert hooks_results, "invalid result from hooks"
3013
3014       for node_name in hooks_results:
3015         res = hooks_results[node_name]
3016         msg = res.fail_msg
3017         test = msg and not res.offline
3018         self._ErrorIf(test, self.ENODEHOOKS, node_name,
3019                       "Communication failure in hooks execution: %s", msg)
3020         if res.offline or msg:
3021           # No need to investigate payload if node is offline or gave
3022           # an error.
3023           continue
3024         for script, hkr, output in res.payload:
3025           test = hkr == constants.HKR_FAIL
3026           self._ErrorIf(test, self.ENODEHOOKS, node_name,
3027                         "Script %s failed, output:", script)
3028           if test:
3029             output = self._HOOKS_INDENT_RE.sub("      ", output)
3030             feedback_fn("%s" % output)
3031             lu_result = False
3032
3033     return lu_result
3034
3035
3036 class LUClusterVerifyDisks(NoHooksLU):
3037   """Verifies the cluster disks status.
3038
3039   """
3040   REQ_BGL = False
3041
3042   def ExpandNames(self):
3043     self.share_locks = _ShareAll()
3044     self.needed_locks = {
3045       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3046       }
3047
3048   def Exec(self, feedback_fn):
3049     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3050
3051     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3052     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3053                            for group in group_names])
3054
3055
3056 class LUGroupVerifyDisks(NoHooksLU):
3057   """Verifies the status of all disks in a node group.
3058
3059   """
3060   REQ_BGL = False
3061
3062   def ExpandNames(self):
3063     # Raises errors.OpPrereqError on its own if group can't be found
3064     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3065
3066     self.share_locks = _ShareAll()
3067     self.needed_locks = {
3068       locking.LEVEL_INSTANCE: [],
3069       locking.LEVEL_NODEGROUP: [],
3070       locking.LEVEL_NODE: [],
3071       }
3072
3073   def DeclareLocks(self, level):
3074     if level == locking.LEVEL_INSTANCE:
3075       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3076
3077       # Lock instances optimistically, needs verification once node and group
3078       # locks have been acquired
3079       self.needed_locks[locking.LEVEL_INSTANCE] = \
3080         self.cfg.GetNodeGroupInstances(self.group_uuid)
3081
3082     elif level == locking.LEVEL_NODEGROUP:
3083       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3084
3085       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3086         set([self.group_uuid] +
3087             # Lock all groups used by instances optimistically; this requires
3088             # going via the node before it's locked, requiring verification
3089             # later on
3090             [group_uuid
3091              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3092              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3093
3094     elif level == locking.LEVEL_NODE:
3095       # This will only lock the nodes in the group to be verified which contain
3096       # actual instances
3097       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3098       self._LockInstancesNodes()
3099
3100       # Lock all nodes in group to be verified
3101       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3102       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3103       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3104
3105   def CheckPrereq(self):
3106     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3107     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3108     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3109
3110     assert self.group_uuid in owned_groups
3111
3112     # Check if locked instances are still correct
3113     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3114
3115     # Get instance information
3116     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3117
3118     # Check if node groups for locked instances are still correct
3119     for (instance_name, inst) in self.instances.items():
3120       assert owned_nodes.issuperset(inst.all_nodes), \
3121         "Instance %s's nodes changed while we kept the lock" % instance_name
3122
3123       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3124                                              owned_groups)
3125
3126       assert self.group_uuid in inst_groups, \
3127         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3128
3129   def Exec(self, feedback_fn):
3130     """Verify integrity of cluster disks.
3131
3132     @rtype: tuple of three items
3133     @return: a tuple of (dict of node-to-node_error, list of instances
3134         which need activate-disks, dict of instance: (node, volume) for
3135         missing volumes
3136
3137     """
3138     res_nodes = {}
3139     res_instances = set()
3140     res_missing = {}
3141
3142     nv_dict = _MapInstanceDisksToNodes([inst
3143                                         for inst in self.instances.values()
3144                                         if inst.admin_up])
3145
3146     if nv_dict:
3147       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3148                              set(self.cfg.GetVmCapableNodeList()))
3149
3150       node_lvs = self.rpc.call_lv_list(nodes, [])
3151
3152       for (node, node_res) in node_lvs.items():
3153         if node_res.offline:
3154           continue
3155
3156         msg = node_res.fail_msg
3157         if msg:
3158           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3159           res_nodes[node] = msg
3160           continue
3161
3162         for lv_name, (_, _, lv_online) in node_res.payload.items():
3163           inst = nv_dict.pop((node, lv_name), None)
3164           if not (lv_online or inst is None):
3165             res_instances.add(inst)
3166
3167       # any leftover items in nv_dict are missing LVs, let's arrange the data
3168       # better
3169       for key, inst in nv_dict.iteritems():
3170         res_missing.setdefault(inst, []).append(list(key))
3171
3172     return (res_nodes, list(res_instances), res_missing)
3173
3174
3175 class LUClusterRepairDiskSizes(NoHooksLU):
3176   """Verifies the cluster disks sizes.
3177
3178   """
3179   REQ_BGL = False
3180
3181   def ExpandNames(self):
3182     if self.op.instances:
3183       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3184       self.needed_locks = {
3185         locking.LEVEL_NODE: [],
3186         locking.LEVEL_INSTANCE: self.wanted_names,
3187         }
3188       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3189     else:
3190       self.wanted_names = None
3191       self.needed_locks = {
3192         locking.LEVEL_NODE: locking.ALL_SET,
3193         locking.LEVEL_INSTANCE: locking.ALL_SET,
3194         }
3195     self.share_locks = {
3196       locking.LEVEL_NODE: 1,
3197       locking.LEVEL_INSTANCE: 0,
3198       }
3199
3200   def DeclareLocks(self, level):
3201     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3202       self._LockInstancesNodes(primary_only=True)
3203
3204   def CheckPrereq(self):
3205     """Check prerequisites.
3206
3207     This only checks the optional instance list against the existing names.
3208
3209     """
3210     if self.wanted_names is None:
3211       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3212
3213     self.wanted_instances = \
3214         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3215
3216   def _EnsureChildSizes(self, disk):
3217     """Ensure children of the disk have the needed disk size.
3218
3219     This is valid mainly for DRBD8 and fixes an issue where the
3220     children have smaller disk size.
3221
3222     @param disk: an L{ganeti.objects.Disk} object
3223
3224     """
3225     if disk.dev_type == constants.LD_DRBD8:
3226       assert disk.children, "Empty children for DRBD8?"
3227       fchild = disk.children[0]
3228       mismatch = fchild.size < disk.size
3229       if mismatch:
3230         self.LogInfo("Child disk has size %d, parent %d, fixing",
3231                      fchild.size, disk.size)
3232         fchild.size = disk.size
3233
3234       # and we recurse on this child only, not on the metadev
3235       return self._EnsureChildSizes(fchild) or mismatch
3236     else:
3237       return False
3238
3239   def Exec(self, feedback_fn):
3240     """Verify the size of cluster disks.
3241
3242     """
3243     # TODO: check child disks too
3244     # TODO: check differences in size between primary/secondary nodes
3245     per_node_disks = {}
3246     for instance in self.wanted_instances:
3247       pnode = instance.primary_node
3248       if pnode not in per_node_disks:
3249         per_node_disks[pnode] = []
3250       for idx, disk in enumerate(instance.disks):
3251         per_node_disks[pnode].append((instance, idx, disk))
3252
3253     changed = []
3254     for node, dskl in per_node_disks.items():
3255       newl = [v[2].Copy() for v in dskl]
3256       for dsk in newl:
3257         self.cfg.SetDiskID(dsk, node)
3258       result = self.rpc.call_blockdev_getsize(node, newl)
3259       if result.fail_msg:
3260         self.LogWarning("Failure in blockdev_getsize call to node"
3261                         " %s, ignoring", node)
3262         continue
3263       if len(result.payload) != len(dskl):
3264         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3265                         " result.payload=%s", node, len(dskl), result.payload)
3266         self.LogWarning("Invalid result from node %s, ignoring node results",
3267                         node)
3268         continue
3269       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3270         if size is None:
3271           self.LogWarning("Disk %d of instance %s did not return size"
3272                           " information, ignoring", idx, instance.name)
3273           continue
3274         if not isinstance(size, (int, long)):
3275           self.LogWarning("Disk %d of instance %s did not return valid"
3276                           " size information, ignoring", idx, instance.name)
3277           continue
3278         size = size >> 20
3279         if size != disk.size:
3280           self.LogInfo("Disk %d of instance %s has mismatched size,"
3281                        " correcting: recorded %d, actual %d", idx,
3282                        instance.name, disk.size, size)
3283           disk.size = size
3284           self.cfg.Update(instance, feedback_fn)
3285           changed.append((instance.name, idx, size))
3286         if self._EnsureChildSizes(disk):
3287           self.cfg.Update(instance, feedback_fn)
3288           changed.append((instance.name, idx, disk.size))
3289     return changed
3290
3291
3292 class LUClusterRename(LogicalUnit):
3293   """Rename the cluster.
3294
3295   """
3296   HPATH = "cluster-rename"
3297   HTYPE = constants.HTYPE_CLUSTER
3298
3299   def BuildHooksEnv(self):
3300     """Build hooks env.
3301
3302     """
3303     return {
3304       "OP_TARGET": self.cfg.GetClusterName(),
3305       "NEW_NAME": self.op.name,
3306       }
3307
3308   def BuildHooksNodes(self):
3309     """Build hooks nodes.
3310
3311     """
3312     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3313
3314   def CheckPrereq(self):
3315     """Verify that the passed name is a valid one.
3316
3317     """
3318     hostname = netutils.GetHostname(name=self.op.name,
3319                                     family=self.cfg.GetPrimaryIPFamily())
3320
3321     new_name = hostname.name
3322     self.ip = new_ip = hostname.ip
3323     old_name = self.cfg.GetClusterName()
3324     old_ip = self.cfg.GetMasterIP()
3325     if new_name == old_name and new_ip == old_ip:
3326       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3327                                  " cluster has changed",
3328                                  errors.ECODE_INVAL)
3329     if new_ip != old_ip:
3330       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3331         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3332                                    " reachable on the network" %
3333                                    new_ip, errors.ECODE_NOTUNIQUE)
3334
3335     self.op.name = new_name
3336
3337   def Exec(self, feedback_fn):
3338     """Rename the cluster.
3339
3340     """
3341     clustername = self.op.name
3342     ip = self.ip
3343
3344     # shutdown the master IP
3345     master = self.cfg.GetMasterNode()
3346     result = self.rpc.call_node_stop_master(master, False)
3347     result.Raise("Could not disable the master role")
3348
3349     try:
3350       cluster = self.cfg.GetClusterInfo()
3351       cluster.cluster_name = clustername
3352       cluster.master_ip = ip
3353       self.cfg.Update(cluster, feedback_fn)
3354
3355       # update the known hosts file
3356       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3357       node_list = self.cfg.GetOnlineNodeList()
3358       try:
3359         node_list.remove(master)
3360       except ValueError:
3361         pass
3362       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3363     finally:
3364       result = self.rpc.call_node_start_master(master, False, False)
3365       msg = result.fail_msg
3366       if msg:
3367         self.LogWarning("Could not re-enable the master role on"
3368                         " the master, please restart manually: %s", msg)
3369
3370     return clustername
3371
3372
3373 class LUClusterSetParams(LogicalUnit):
3374   """Change the parameters of the cluster.
3375
3376   """
3377   HPATH = "cluster-modify"
3378   HTYPE = constants.HTYPE_CLUSTER
3379   REQ_BGL = False
3380
3381   def CheckArguments(self):
3382     """Check parameters
3383
3384     """
3385     if self.op.uid_pool:
3386       uidpool.CheckUidPool(self.op.uid_pool)
3387
3388     if self.op.add_uids:
3389       uidpool.CheckUidPool(self.op.add_uids)
3390
3391     if self.op.remove_uids:
3392       uidpool.CheckUidPool(self.op.remove_uids)
3393
3394   def ExpandNames(self):
3395     # FIXME: in the future maybe other cluster params won't require checking on
3396     # all nodes to be modified.
3397     self.needed_locks = {
3398       locking.LEVEL_NODE: locking.ALL_SET,
3399     }
3400     self.share_locks[locking.LEVEL_NODE] = 1
3401
3402   def BuildHooksEnv(self):
3403     """Build hooks env.
3404
3405     """
3406     return {
3407       "OP_TARGET": self.cfg.GetClusterName(),
3408       "NEW_VG_NAME": self.op.vg_name,
3409       }
3410
3411   def BuildHooksNodes(self):
3412     """Build hooks nodes.
3413
3414     """
3415     mn = self.cfg.GetMasterNode()
3416     return ([mn], [mn])
3417
3418   def CheckPrereq(self):
3419     """Check prerequisites.
3420
3421     This checks whether the given params don't conflict and
3422     if the given volume group is valid.
3423
3424     """
3425     if self.op.vg_name is not None and not self.op.vg_name:
3426       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3427         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3428                                    " instances exist", errors.ECODE_INVAL)
3429
3430     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3431       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3432         raise errors.OpPrereqError("Cannot disable drbd helper while"
3433                                    " drbd-based instances exist",
3434                                    errors.ECODE_INVAL)
3435
3436     node_list = self.owned_locks(locking.LEVEL_NODE)
3437
3438     # if vg_name not None, checks given volume group on all nodes
3439     if self.op.vg_name:
3440       vglist = self.rpc.call_vg_list(node_list)
3441       for node in node_list:
3442         msg = vglist[node].fail_msg
3443         if msg:
3444           # ignoring down node
3445           self.LogWarning("Error while gathering data on node %s"
3446                           " (ignoring node): %s", node, msg)
3447           continue
3448         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3449                                               self.op.vg_name,
3450                                               constants.MIN_VG_SIZE)
3451         if vgstatus:
3452           raise errors.OpPrereqError("Error on node '%s': %s" %
3453                                      (node, vgstatus), errors.ECODE_ENVIRON)
3454
3455     if self.op.drbd_helper:
3456       # checks given drbd helper on all nodes
3457       helpers = self.rpc.call_drbd_helper(node_list)
3458       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3459         if ninfo.offline:
3460           self.LogInfo("Not checking drbd helper on offline node %s", node)
3461           continue
3462         msg = helpers[node].fail_msg
3463         if msg:
3464           raise errors.OpPrereqError("Error checking drbd helper on node"
3465                                      " '%s': %s" % (node, msg),
3466                                      errors.ECODE_ENVIRON)
3467         node_helper = helpers[node].payload
3468         if node_helper != self.op.drbd_helper:
3469           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3470                                      (node, node_helper), errors.ECODE_ENVIRON)
3471
3472     self.cluster = cluster = self.cfg.GetClusterInfo()
3473     # validate params changes
3474     if self.op.beparams:
3475       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3476       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3477
3478     if self.op.ndparams:
3479       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3480       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3481
3482       # TODO: we need a more general way to handle resetting
3483       # cluster-level parameters to default values
3484       if self.new_ndparams["oob_program"] == "":
3485         self.new_ndparams["oob_program"] = \
3486             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3487
3488     if self.op.nicparams:
3489       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3490       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3491       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3492       nic_errors = []
3493
3494       # check all instances for consistency
3495       for instance in self.cfg.GetAllInstancesInfo().values():
3496         for nic_idx, nic in enumerate(instance.nics):
3497           params_copy = copy.deepcopy(nic.nicparams)
3498           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3499
3500           # check parameter syntax
3501           try:
3502             objects.NIC.CheckParameterSyntax(params_filled)
3503           except errors.ConfigurationError, err:
3504             nic_errors.append("Instance %s, nic/%d: %s" %
3505                               (instance.name, nic_idx, err))
3506
3507           # if we're moving instances to routed, check that they have an ip
3508           target_mode = params_filled[constants.NIC_MODE]
3509           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3510             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3511                               " address" % (instance.name, nic_idx))
3512       if nic_errors:
3513         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3514                                    "\n".join(nic_errors))
3515
3516     # hypervisor list/parameters
3517     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3518     if self.op.hvparams:
3519       for hv_name, hv_dict in self.op.hvparams.items():
3520         if hv_name not in self.new_hvparams:
3521           self.new_hvparams[hv_name] = hv_dict
3522         else:
3523           self.new_hvparams[hv_name].update(hv_dict)
3524
3525     # os hypervisor parameters
3526     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3527     if self.op.os_hvp:
3528       for os_name, hvs in self.op.os_hvp.items():
3529         if os_name not in self.new_os_hvp:
3530           self.new_os_hvp[os_name] = hvs
3531         else:
3532           for hv_name, hv_dict in hvs.items():
3533             if hv_name not in self.new_os_hvp[os_name]:
3534               self.new_os_hvp[os_name][hv_name] = hv_dict
3535             else:
3536               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3537
3538     # os parameters
3539     self.new_osp = objects.FillDict(cluster.osparams, {})
3540     if self.op.osparams:
3541       for os_name, osp in self.op.osparams.items():
3542         if os_name not in self.new_osp:
3543           self.new_osp[os_name] = {}
3544
3545         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3546                                                   use_none=True)
3547
3548         if not self.new_osp[os_name]:
3549           # we removed all parameters
3550           del self.new_osp[os_name]
3551         else:
3552           # check the parameter validity (remote check)
3553           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3554                          os_name, self.new_osp[os_name])
3555
3556     # changes to the hypervisor list
3557     if self.op.enabled_hypervisors is not None:
3558       self.hv_list = self.op.enabled_hypervisors
3559       for hv in self.hv_list:
3560         # if the hypervisor doesn't already exist in the cluster
3561         # hvparams, we initialize it to empty, and then (in both
3562         # cases) we make sure to fill the defaults, as we might not
3563         # have a complete defaults list if the hypervisor wasn't
3564         # enabled before
3565         if hv not in new_hvp:
3566           new_hvp[hv] = {}
3567         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3568         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3569     else:
3570       self.hv_list = cluster.enabled_hypervisors
3571
3572     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3573       # either the enabled list has changed, or the parameters have, validate
3574       for hv_name, hv_params in self.new_hvparams.items():
3575         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3576             (self.op.enabled_hypervisors and
3577              hv_name in self.op.enabled_hypervisors)):
3578           # either this is a new hypervisor, or its parameters have changed
3579           hv_class = hypervisor.GetHypervisor(hv_name)
3580           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3581           hv_class.CheckParameterSyntax(hv_params)
3582           _CheckHVParams(self, node_list, hv_name, hv_params)
3583
3584     if self.op.os_hvp:
3585       # no need to check any newly-enabled hypervisors, since the
3586       # defaults have already been checked in the above code-block
3587       for os_name, os_hvp in self.new_os_hvp.items():
3588         for hv_name, hv_params in os_hvp.items():
3589           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3590           # we need to fill in the new os_hvp on top of the actual hv_p
3591           cluster_defaults = self.new_hvparams.get(hv_name, {})
3592           new_osp = objects.FillDict(cluster_defaults, hv_params)
3593           hv_class = hypervisor.GetHypervisor(hv_name)
3594           hv_class.CheckParameterSyntax(new_osp)
3595           _CheckHVParams(self, node_list, hv_name, new_osp)
3596
3597     if self.op.default_iallocator:
3598       alloc_script = utils.FindFile(self.op.default_iallocator,
3599                                     constants.IALLOCATOR_SEARCH_PATH,
3600                                     os.path.isfile)
3601       if alloc_script is None:
3602         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3603                                    " specified" % self.op.default_iallocator,
3604                                    errors.ECODE_INVAL)
3605
3606   def Exec(self, feedback_fn):
3607     """Change the parameters of the cluster.
3608
3609     """
3610     if self.op.vg_name is not None:
3611       new_volume = self.op.vg_name
3612       if not new_volume:
3613         new_volume = None
3614       if new_volume != self.cfg.GetVGName():
3615         self.cfg.SetVGName(new_volume)
3616       else:
3617         feedback_fn("Cluster LVM configuration already in desired"
3618                     " state, not changing")
3619     if self.op.drbd_helper is not None:
3620       new_helper = self.op.drbd_helper
3621       if not new_helper:
3622         new_helper = None
3623       if new_helper != self.cfg.GetDRBDHelper():
3624         self.cfg.SetDRBDHelper(new_helper)
3625       else:
3626         feedback_fn("Cluster DRBD helper already in desired state,"
3627                     " not changing")
3628     if self.op.hvparams:
3629       self.cluster.hvparams = self.new_hvparams
3630     if self.op.os_hvp:
3631       self.cluster.os_hvp = self.new_os_hvp
3632     if self.op.enabled_hypervisors is not None:
3633       self.cluster.hvparams = self.new_hvparams
3634       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3635     if self.op.beparams:
3636       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3637     if self.op.nicparams:
3638       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3639     if self.op.osparams:
3640       self.cluster.osparams = self.new_osp
3641     if self.op.ndparams:
3642       self.cluster.ndparams = self.new_ndparams
3643
3644     if self.op.candidate_pool_size is not None:
3645       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3646       # we need to update the pool size here, otherwise the save will fail
3647       _AdjustCandidatePool(self, [])
3648
3649     if self.op.maintain_node_health is not None:
3650       self.cluster.maintain_node_health = self.op.maintain_node_health
3651
3652     if self.op.prealloc_wipe_disks is not None:
3653       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3654
3655     if self.op.add_uids is not None:
3656       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3657
3658     if self.op.remove_uids is not None:
3659       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3660
3661     if self.op.uid_pool is not None:
3662       self.cluster.uid_pool = self.op.uid_pool
3663
3664     if self.op.default_iallocator is not None:
3665       self.cluster.default_iallocator = self.op.default_iallocator
3666
3667     if self.op.reserved_lvs is not None:
3668       self.cluster.reserved_lvs = self.op.reserved_lvs
3669
3670     def helper_os(aname, mods, desc):
3671       desc += " OS list"
3672       lst = getattr(self.cluster, aname)
3673       for key, val in mods:
3674         if key == constants.DDM_ADD:
3675           if val in lst:
3676             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3677           else:
3678             lst.append(val)
3679         elif key == constants.DDM_REMOVE:
3680           if val in lst:
3681             lst.remove(val)
3682           else:
3683             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3684         else:
3685           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3686
3687     if self.op.hidden_os:
3688       helper_os("hidden_os", self.op.hidden_os, "hidden")
3689
3690     if self.op.blacklisted_os:
3691       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3692
3693     if self.op.master_netdev:
3694       master = self.cfg.GetMasterNode()
3695       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3696                   self.cluster.master_netdev)
3697       result = self.rpc.call_node_stop_master(master, False)
3698       result.Raise("Could not disable the master ip")
3699       feedback_fn("Changing master_netdev from %s to %s" %
3700                   (self.cluster.master_netdev, self.op.master_netdev))
3701       self.cluster.master_netdev = self.op.master_netdev
3702
3703     self.cfg.Update(self.cluster, feedback_fn)
3704
3705     if self.op.master_netdev:
3706       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3707                   self.op.master_netdev)
3708       result = self.rpc.call_node_start_master(master, False, False)
3709       if result.fail_msg:
3710         self.LogWarning("Could not re-enable the master ip on"
3711                         " the master, please restart manually: %s",
3712                         result.fail_msg)
3713
3714
3715 def _UploadHelper(lu, nodes, fname):
3716   """Helper for uploading a file and showing warnings.
3717
3718   """
3719   if os.path.exists(fname):
3720     result = lu.rpc.call_upload_file(nodes, fname)
3721     for to_node, to_result in result.items():
3722       msg = to_result.fail_msg
3723       if msg:
3724         msg = ("Copy of file %s to node %s failed: %s" %
3725                (fname, to_node, msg))
3726         lu.proc.LogWarning(msg)
3727
3728
3729 def _ComputeAncillaryFiles(cluster, redist):
3730   """Compute files external to Ganeti which need to be consistent.
3731
3732   @type redist: boolean
3733   @param redist: Whether to include files which need to be redistributed
3734
3735   """
3736   # Compute files for all nodes
3737   files_all = set([
3738     constants.SSH_KNOWN_HOSTS_FILE,
3739     constants.CONFD_HMAC_KEY,
3740     constants.CLUSTER_DOMAIN_SECRET_FILE,
3741     ])
3742
3743   if not redist:
3744     files_all.update(constants.ALL_CERT_FILES)
3745     files_all.update(ssconf.SimpleStore().GetFileList())
3746   else:
3747     # we need to ship at least the RAPI certificate
3748     files_all.add(constants.RAPI_CERT_FILE)
3749
3750   if cluster.modify_etc_hosts:
3751     files_all.add(constants.ETC_HOSTS)
3752
3753   # Files which must either exist on all nodes or on none
3754   files_all_opt = set([
3755     constants.RAPI_USERS_FILE,
3756     ])
3757
3758   # Files which should only be on master candidates
3759   files_mc = set()
3760   if not redist:
3761     files_mc.add(constants.CLUSTER_CONF_FILE)
3762
3763   # Files which should only be on VM-capable nodes
3764   files_vm = set(filename
3765     for hv_name in cluster.enabled_hypervisors
3766     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3767
3768   # Filenames must be unique
3769   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3770           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3771          "Found file listed in more than one file list"
3772
3773   return (files_all, files_all_opt, files_mc, files_vm)
3774
3775
3776 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3777   """Distribute additional files which are part of the cluster configuration.
3778
3779   ConfigWriter takes care of distributing the config and ssconf files, but
3780   there are more files which should be distributed to all nodes. This function
3781   makes sure those are copied.
3782
3783   @param lu: calling logical unit
3784   @param additional_nodes: list of nodes not in the config to distribute to
3785   @type additional_vm: boolean
3786   @param additional_vm: whether the additional nodes are vm-capable or not
3787
3788   """
3789   # Gather target nodes
3790   cluster = lu.cfg.GetClusterInfo()
3791   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3792
3793   online_nodes = lu.cfg.GetOnlineNodeList()
3794   vm_nodes = lu.cfg.GetVmCapableNodeList()
3795
3796   if additional_nodes is not None:
3797     online_nodes.extend(additional_nodes)
3798     if additional_vm:
3799       vm_nodes.extend(additional_nodes)
3800
3801   # Never distribute to master node
3802   for nodelist in [online_nodes, vm_nodes]:
3803     if master_info.name in nodelist:
3804       nodelist.remove(master_info.name)
3805
3806   # Gather file lists
3807   (files_all, files_all_opt, files_mc, files_vm) = \
3808     _ComputeAncillaryFiles(cluster, True)
3809
3810   # Never re-distribute configuration file from here
3811   assert not (constants.CLUSTER_CONF_FILE in files_all or
3812               constants.CLUSTER_CONF_FILE in files_vm)
3813   assert not files_mc, "Master candidates not handled in this function"
3814
3815   filemap = [
3816     (online_nodes, files_all),
3817     (online_nodes, files_all_opt),
3818     (vm_nodes, files_vm),
3819     ]
3820
3821   # Upload the files
3822   for (node_list, files) in filemap:
3823     for fname in files:
3824       _UploadHelper(lu, node_list, fname)
3825
3826
3827 class LUClusterRedistConf(NoHooksLU):
3828   """Force the redistribution of cluster configuration.
3829
3830   This is a very simple LU.
3831
3832   """
3833   REQ_BGL = False
3834
3835   def ExpandNames(self):
3836     self.needed_locks = {
3837       locking.LEVEL_NODE: locking.ALL_SET,
3838     }
3839     self.share_locks[locking.LEVEL_NODE] = 1
3840
3841   def Exec(self, feedback_fn):
3842     """Redistribute the configuration.
3843
3844     """
3845     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3846     _RedistributeAncillaryFiles(self)
3847
3848
3849 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3850   """Sleep and poll for an instance's disk to sync.
3851
3852   """
3853   if not instance.disks or disks is not None and not disks:
3854     return True
3855
3856   disks = _ExpandCheckDisks(instance, disks)
3857
3858   if not oneshot:
3859     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3860
3861   node = instance.primary_node
3862
3863   for dev in disks:
3864     lu.cfg.SetDiskID(dev, node)
3865
3866   # TODO: Convert to utils.Retry
3867
3868   retries = 0
3869   degr_retries = 10 # in seconds, as we sleep 1 second each time
3870   while True:
3871     max_time = 0
3872     done = True
3873     cumul_degraded = False
3874     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3875     msg = rstats.fail_msg
3876     if msg:
3877       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3878       retries += 1
3879       if retries >= 10:
3880         raise errors.RemoteError("Can't contact node %s for mirror data,"
3881                                  " aborting." % node)
3882       time.sleep(6)
3883       continue
3884     rstats = rstats.payload
3885     retries = 0
3886     for i, mstat in enumerate(rstats):
3887       if mstat is None:
3888         lu.LogWarning("Can't compute data for node %s/%s",
3889                            node, disks[i].iv_name)
3890         continue
3891
3892       cumul_degraded = (cumul_degraded or
3893                         (mstat.is_degraded and mstat.sync_percent is None))
3894       if mstat.sync_percent is not None:
3895         done = False
3896         if mstat.estimated_time is not None:
3897           rem_time = ("%s remaining (estimated)" %
3898                       utils.FormatSeconds(mstat.estimated_time))
3899           max_time = mstat.estimated_time
3900         else:
3901           rem_time = "no time estimate"
3902         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3903                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3904
3905     # if we're done but degraded, let's do a few small retries, to
3906     # make sure we see a stable and not transient situation; therefore
3907     # we force restart of the loop
3908     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3909       logging.info("Degraded disks found, %d retries left", degr_retries)
3910       degr_retries -= 1
3911       time.sleep(1)
3912       continue
3913
3914     if done or oneshot:
3915       break
3916
3917     time.sleep(min(60, max_time))
3918
3919   if done:
3920     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3921   return not cumul_degraded
3922
3923
3924 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3925   """Check that mirrors are not degraded.
3926
3927   The ldisk parameter, if True, will change the test from the
3928   is_degraded attribute (which represents overall non-ok status for
3929   the device(s)) to the ldisk (representing the local storage status).
3930
3931   """
3932   lu.cfg.SetDiskID(dev, node)
3933
3934   result = True
3935
3936   if on_primary or dev.AssembleOnSecondary():
3937     rstats = lu.rpc.call_blockdev_find(node, dev)
3938     msg = rstats.fail_msg
3939     if msg:
3940       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3941       result = False
3942     elif not rstats.payload:
3943       lu.LogWarning("Can't find disk on node %s", node)
3944       result = False
3945     else:
3946       if ldisk:
3947         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3948       else:
3949         result = result and not rstats.payload.is_degraded
3950
3951   if dev.children:
3952     for child in dev.children:
3953       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3954
3955   return result
3956
3957
3958 class LUOobCommand(NoHooksLU):
3959   """Logical unit for OOB handling.
3960
3961   """
3962   REG_BGL = False
3963   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3964
3965   def ExpandNames(self):
3966     """Gather locks we need.
3967
3968     """
3969     if self.op.node_names:
3970       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3971       lock_names = self.op.node_names
3972     else:
3973       lock_names = locking.ALL_SET
3974
3975     self.needed_locks = {
3976       locking.LEVEL_NODE: lock_names,
3977       }
3978
3979   def CheckPrereq(self):
3980     """Check prerequisites.
3981
3982     This checks:
3983      - the node exists in the configuration
3984      - OOB is supported
3985
3986     Any errors are signaled by raising errors.OpPrereqError.
3987
3988     """
3989     self.nodes = []
3990     self.master_node = self.cfg.GetMasterNode()
3991
3992     assert self.op.power_delay >= 0.0
3993
3994     if self.op.node_names:
3995       if (self.op.command in self._SKIP_MASTER and
3996           self.master_node in self.op.node_names):
3997         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3998         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3999
4000         if master_oob_handler:
4001           additional_text = ("run '%s %s %s' if you want to operate on the"
4002                              " master regardless") % (master_oob_handler,
4003                                                       self.op.command,
4004                                                       self.master_node)
4005         else:
4006           additional_text = "it does not support out-of-band operations"
4007
4008         raise errors.OpPrereqError(("Operating on the master node %s is not"
4009                                     " allowed for %s; %s") %
4010                                    (self.master_node, self.op.command,
4011                                     additional_text), errors.ECODE_INVAL)
4012     else:
4013       self.op.node_names = self.cfg.GetNodeList()
4014       if self.op.command in self._SKIP_MASTER:
4015         self.op.node_names.remove(self.master_node)
4016
4017     if self.op.command in self._SKIP_MASTER:
4018       assert self.master_node not in self.op.node_names
4019
4020     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4021       if node is None:
4022         raise errors.OpPrereqError("Node %s not found" % node_name,
4023                                    errors.ECODE_NOENT)
4024       else:
4025         self.nodes.append(node)
4026
4027       if (not self.op.ignore_status and
4028           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4029         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4030                                     " not marked offline") % node_name,
4031                                    errors.ECODE_STATE)
4032
4033   def Exec(self, feedback_fn):
4034     """Execute OOB and return result if we expect any.
4035
4036     """
4037     master_node = self.master_node
4038     ret = []
4039
4040     for idx, node in enumerate(utils.NiceSort(self.nodes,
4041                                               key=lambda node: node.name)):
4042       node_entry = [(constants.RS_NORMAL, node.name)]
4043       ret.append(node_entry)
4044
4045       oob_program = _SupportsOob(self.cfg, node)
4046
4047       if not oob_program:
4048         node_entry.append((constants.RS_UNAVAIL, None))
4049         continue
4050
4051       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4052                    self.op.command, oob_program, node.name)
4053       result = self.rpc.call_run_oob(master_node, oob_program,
4054                                      self.op.command, node.name,
4055                                      self.op.timeout)
4056
4057       if result.fail_msg:
4058         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4059                         node.name, result.fail_msg)
4060         node_entry.append((constants.RS_NODATA, None))
4061       else:
4062         try:
4063           self._CheckPayload(result)
4064         except errors.OpExecError, err:
4065           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4066                           node.name, err)
4067           node_entry.append((constants.RS_NODATA, None))
4068         else:
4069           if self.op.command == constants.OOB_HEALTH:
4070             # For health we should log important events
4071             for item, status in result.payload:
4072               if status in [constants.OOB_STATUS_WARNING,
4073                             constants.OOB_STATUS_CRITICAL]:
4074                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4075                                 item, node.name, status)
4076
4077           if self.op.command == constants.OOB_POWER_ON:
4078             node.powered = True
4079           elif self.op.command == constants.OOB_POWER_OFF:
4080             node.powered = False
4081           elif self.op.command == constants.OOB_POWER_STATUS:
4082             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4083             if powered != node.powered:
4084               logging.warning(("Recorded power state (%s) of node '%s' does not"
4085                                " match actual power state (%s)"), node.powered,
4086                               node.name, powered)
4087
4088           # For configuration changing commands we should update the node
4089           if self.op.command in (constants.OOB_POWER_ON,
4090                                  constants.OOB_POWER_OFF):
4091             self.cfg.Update(node, feedback_fn)
4092
4093           node_entry.append((constants.RS_NORMAL, result.payload))
4094
4095           if (self.op.command == constants.OOB_POWER_ON and
4096               idx < len(self.nodes) - 1):
4097             time.sleep(self.op.power_delay)
4098
4099     return ret
4100
4101   def _CheckPayload(self, result):
4102     """Checks if the payload is valid.
4103
4104     @param result: RPC result
4105     @raises errors.OpExecError: If payload is not valid
4106
4107     """
4108     errs = []
4109     if self.op.command == constants.OOB_HEALTH:
4110       if not isinstance(result.payload, list):
4111         errs.append("command 'health' is expected to return a list but got %s" %
4112                     type(result.payload))
4113       else:
4114         for item, status in result.payload:
4115           if status not in constants.OOB_STATUSES:
4116             errs.append("health item '%s' has invalid status '%s'" %
4117                         (item, status))
4118
4119     if self.op.command == constants.OOB_POWER_STATUS:
4120       if not isinstance(result.payload, dict):
4121         errs.append("power-status is expected to return a dict but got %s" %
4122                     type(result.payload))
4123
4124     if self.op.command in [
4125         constants.OOB_POWER_ON,
4126         constants.OOB_POWER_OFF,
4127         constants.OOB_POWER_CYCLE,
4128         ]:
4129       if result.payload is not None:
4130         errs.append("%s is expected to not return payload but got '%s'" %
4131                     (self.op.command, result.payload))
4132
4133     if errs:
4134       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4135                                utils.CommaJoin(errs))
4136
4137
4138 class _OsQuery(_QueryBase):
4139   FIELDS = query.OS_FIELDS
4140
4141   def ExpandNames(self, lu):
4142     # Lock all nodes in shared mode
4143     # Temporary removal of locks, should be reverted later
4144     # TODO: reintroduce locks when they are lighter-weight
4145     lu.needed_locks = {}
4146     #self.share_locks[locking.LEVEL_NODE] = 1
4147     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4148
4149     # The following variables interact with _QueryBase._GetNames
4150     if self.names:
4151       self.wanted = self.names
4152     else:
4153       self.wanted = locking.ALL_SET
4154
4155     self.do_locking = self.use_locking
4156
4157   def DeclareLocks(self, lu, level):
4158     pass
4159
4160   @staticmethod
4161   def _DiagnoseByOS(rlist):
4162     """Remaps a per-node return list into an a per-os per-node dictionary
4163
4164     @param rlist: a map with node names as keys and OS objects as values
4165
4166     @rtype: dict
4167     @return: a dictionary with osnames as keys and as value another
4168         map, with nodes as keys and tuples of (path, status, diagnose,
4169         variants, parameters, api_versions) as values, eg::
4170
4171           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4172                                      (/srv/..., False, "invalid api")],
4173                            "node2": [(/srv/..., True, "", [], [])]}
4174           }
4175
4176     """
4177     all_os = {}
4178     # we build here the list of nodes that didn't fail the RPC (at RPC
4179     # level), so that nodes with a non-responding node daemon don't
4180     # make all OSes invalid
4181     good_nodes = [node_name for node_name in rlist
4182                   if not rlist[node_name].fail_msg]
4183     for node_name, nr in rlist.items():
4184       if nr.fail_msg or not nr.payload:
4185         continue
4186       for (name, path, status, diagnose, variants,
4187            params, api_versions) in nr.payload:
4188         if name not in all_os:
4189           # build a list of nodes for this os containing empty lists
4190           # for each node in node_list
4191           all_os[name] = {}
4192           for nname in good_nodes:
4193             all_os[name][nname] = []
4194         # convert params from [name, help] to (name, help)
4195         params = [tuple(v) for v in params]
4196         all_os[name][node_name].append((path, status, diagnose,
4197                                         variants, params, api_versions))
4198     return all_os
4199
4200   def _GetQueryData(self, lu):
4201     """Computes the list of nodes and their attributes.
4202
4203     """
4204     # Locking is not used
4205     assert not (compat.any(lu.glm.is_owned(level)
4206                            for level in locking.LEVELS
4207                            if level != locking.LEVEL_CLUSTER) or
4208                 self.do_locking or self.use_locking)
4209
4210     valid_nodes = [node.name
4211                    for node in lu.cfg.GetAllNodesInfo().values()
4212                    if not node.offline and node.vm_capable]
4213     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4214     cluster = lu.cfg.GetClusterInfo()
4215
4216     data = {}
4217
4218     for (os_name, os_data) in pol.items():
4219       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4220                           hidden=(os_name in cluster.hidden_os),
4221                           blacklisted=(os_name in cluster.blacklisted_os))
4222
4223       variants = set()
4224       parameters = set()
4225       api_versions = set()
4226
4227       for idx, osl in enumerate(os_data.values()):
4228         info.valid = bool(info.valid and osl and osl[0][1])
4229         if not info.valid:
4230           break
4231
4232         (node_variants, node_params, node_api) = osl[0][3:6]
4233         if idx == 0:
4234           # First entry
4235           variants.update(node_variants)
4236           parameters.update(node_params)
4237           api_versions.update(node_api)
4238         else:
4239           # Filter out inconsistent values
4240           variants.intersection_update(node_variants)
4241           parameters.intersection_update(node_params)
4242           api_versions.intersection_update(node_api)
4243
4244       info.variants = list(variants)
4245       info.parameters = list(parameters)
4246       info.api_versions = list(api_versions)
4247
4248       data[os_name] = info
4249
4250     # Prepare data in requested order
4251     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4252             if name in data]
4253
4254
4255 class LUOsDiagnose(NoHooksLU):
4256   """Logical unit for OS diagnose/query.
4257
4258   """
4259   REQ_BGL = False
4260
4261   @staticmethod
4262   def _BuildFilter(fields, names):
4263     """Builds a filter for querying OSes.
4264
4265     """
4266     name_filter = qlang.MakeSimpleFilter("name", names)
4267
4268     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4269     # respective field is not requested
4270     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4271                      for fname in ["hidden", "blacklisted"]
4272                      if fname not in fields]
4273     if "valid" not in fields:
4274       status_filter.append([qlang.OP_TRUE, "valid"])
4275
4276     if status_filter:
4277       status_filter.insert(0, qlang.OP_AND)
4278     else:
4279       status_filter = None
4280
4281     if name_filter and status_filter:
4282       return [qlang.OP_AND, name_filter, status_filter]
4283     elif name_filter:
4284       return name_filter
4285     else:
4286       return status_filter
4287
4288   def CheckArguments(self):
4289     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4290                        self.op.output_fields, False)
4291
4292   def ExpandNames(self):
4293     self.oq.ExpandNames(self)
4294
4295   def Exec(self, feedback_fn):
4296     return self.oq.OldStyleQuery(self)
4297
4298
4299 class LUNodeRemove(LogicalUnit):
4300   """Logical unit for removing a node.
4301
4302   """
4303   HPATH = "node-remove"
4304   HTYPE = constants.HTYPE_NODE
4305
4306   def BuildHooksEnv(self):
4307     """Build hooks env.
4308
4309     This doesn't run on the target node in the pre phase as a failed
4310     node would then be impossible to remove.
4311
4312     """
4313     return {
4314       "OP_TARGET": self.op.node_name,
4315       "NODE_NAME": self.op.node_name,
4316       }
4317
4318   def BuildHooksNodes(self):
4319     """Build hooks nodes.
4320
4321     """
4322     all_nodes = self.cfg.GetNodeList()
4323     try:
4324       all_nodes.remove(self.op.node_name)
4325     except ValueError:
4326       logging.warning("Node '%s', which is about to be removed, was not found"
4327                       " in the list of all nodes", self.op.node_name)
4328     return (all_nodes, all_nodes)
4329
4330   def CheckPrereq(self):
4331     """Check prerequisites.
4332
4333     This checks:
4334      - the node exists in the configuration
4335      - it does not have primary or secondary instances
4336      - it's not the master
4337
4338     Any errors are signaled by raising errors.OpPrereqError.
4339
4340     """
4341     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4342     node = self.cfg.GetNodeInfo(self.op.node_name)
4343     assert node is not None
4344
4345     masternode = self.cfg.GetMasterNode()
4346     if node.name == masternode:
4347       raise errors.OpPrereqError("Node is the master node, failover to another"
4348                                  " node is required", errors.ECODE_INVAL)
4349
4350     for instance_name, instance in self.cfg.GetAllInstancesInfo():
4351       if node.name in instance.all_nodes:
4352         raise errors.OpPrereqError("Instance %s is still running on the node,"
4353                                    " please remove first" % instance_name,
4354                                    errors.ECODE_INVAL)
4355     self.op.node_name = node.name
4356     self.node = node
4357
4358   def Exec(self, feedback_fn):
4359     """Removes the node from the cluster.
4360
4361     """
4362     node = self.node
4363     logging.info("Stopping the node daemon and removing configs from node %s",
4364                  node.name)
4365
4366     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4367
4368     # Promote nodes to master candidate as needed
4369     _AdjustCandidatePool(self, exceptions=[node.name])
4370     self.context.RemoveNode(node.name)
4371
4372     # Run post hooks on the node before it's removed
4373     _RunPostHook(self, node.name)
4374
4375     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4376     msg = result.fail_msg
4377     if msg:
4378       self.LogWarning("Errors encountered on the remote node while leaving"
4379                       " the cluster: %s", msg)
4380
4381     # Remove node from our /etc/hosts
4382     if self.cfg.GetClusterInfo().modify_etc_hosts:
4383       master_node = self.cfg.GetMasterNode()
4384       result = self.rpc.call_etc_hosts_modify(master_node,
4385                                               constants.ETC_HOSTS_REMOVE,
4386                                               node.name, None)
4387       result.Raise("Can't update hosts file with new host data")
4388       _RedistributeAncillaryFiles(self)
4389
4390
4391 class _NodeQuery(_QueryBase):
4392   FIELDS = query.NODE_FIELDS
4393
4394   def ExpandNames(self, lu):
4395     lu.needed_locks = {}
4396     lu.share_locks = _ShareAll()
4397
4398     if self.names:
4399       self.wanted = _GetWantedNodes(lu, self.names)
4400     else:
4401       self.wanted = locking.ALL_SET
4402
4403     self.do_locking = (self.use_locking and
4404                        query.NQ_LIVE in self.requested_data)
4405
4406     if self.do_locking:
4407       # If any non-static field is requested we need to lock the nodes
4408       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4409
4410   def DeclareLocks(self, lu, level):
4411     pass
4412
4413   def _GetQueryData(self, lu):
4414     """Computes the list of nodes and their attributes.
4415
4416     """
4417     all_info = lu.cfg.GetAllNodesInfo()
4418
4419     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4420
4421     # Gather data as requested
4422     if query.NQ_LIVE in self.requested_data:
4423       # filter out non-vm_capable nodes
4424       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4425
4426       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4427                                         lu.cfg.GetHypervisorType())
4428       live_data = dict((name, nresult.payload)
4429                        for (name, nresult) in node_data.items()
4430                        if not nresult.fail_msg and nresult.payload)
4431     else:
4432       live_data = None
4433
4434     if query.NQ_INST in self.requested_data:
4435       node_to_primary = dict([(name, set()) for name in nodenames])
4436       node_to_secondary = dict([(name, set()) for name in nodenames])
4437
4438       inst_data = lu.cfg.GetAllInstancesInfo()
4439
4440       for inst in inst_data.values():
4441         if inst.primary_node in node_to_primary:
4442           node_to_primary[inst.primary_node].add(inst.name)
4443         for secnode in inst.secondary_nodes:
4444           if secnode in node_to_secondary:
4445             node_to_secondary[secnode].add(inst.name)
4446     else:
4447       node_to_primary = None
4448       node_to_secondary = None
4449
4450     if query.NQ_OOB in self.requested_data:
4451       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4452                          for name, node in all_info.iteritems())
4453     else:
4454       oob_support = None
4455
4456     if query.NQ_GROUP in self.requested_data:
4457       groups = lu.cfg.GetAllNodeGroupsInfo()
4458     else:
4459       groups = {}
4460
4461     return query.NodeQueryData([all_info[name] for name in nodenames],
4462                                live_data, lu.cfg.GetMasterNode(),
4463                                node_to_primary, node_to_secondary, groups,
4464                                oob_support, lu.cfg.GetClusterInfo())
4465
4466
4467 class LUNodeQuery(NoHooksLU):
4468   """Logical unit for querying nodes.
4469
4470   """
4471   # pylint: disable=W0142
4472   REQ_BGL = False
4473
4474   def CheckArguments(self):
4475     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4476                          self.op.output_fields, self.op.use_locking)
4477
4478   def ExpandNames(self):
4479     self.nq.ExpandNames(self)
4480
4481   def Exec(self, feedback_fn):
4482     return self.nq.OldStyleQuery(self)
4483
4484
4485 class LUNodeQueryvols(NoHooksLU):
4486   """Logical unit for getting volumes on node(s).
4487
4488   """
4489   REQ_BGL = False
4490   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4491   _FIELDS_STATIC = utils.FieldSet("node")
4492
4493   def CheckArguments(self):
4494     _CheckOutputFields(static=self._FIELDS_STATIC,
4495                        dynamic=self._FIELDS_DYNAMIC,
4496                        selected=self.op.output_fields)
4497
4498   def ExpandNames(self):
4499     self.needed_locks = {}
4500     self.share_locks[locking.LEVEL_NODE] = 1
4501     if not self.op.nodes:
4502       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4503     else:
4504       self.needed_locks[locking.LEVEL_NODE] = \
4505         _GetWantedNodes(self, self.op.nodes)
4506
4507   def Exec(self, feedback_fn):
4508     """Computes the list of nodes and their attributes.
4509
4510     """
4511     nodenames = self.owned_locks(locking.LEVEL_NODE)
4512     volumes = self.rpc.call_node_volumes(nodenames)
4513
4514     ilist = self.cfg.GetAllInstancesInfo()
4515     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4516
4517     output = []
4518     for node in nodenames:
4519       nresult = volumes[node]
4520       if nresult.offline:
4521         continue
4522       msg = nresult.fail_msg
4523       if msg:
4524         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4525         continue
4526
4527       node_vols = sorted(nresult.payload,
4528                          key=operator.itemgetter("dev"))
4529
4530       for vol in node_vols:
4531         node_output = []
4532         for field in self.op.output_fields:
4533           if field == "node":
4534             val = node
4535           elif field == "phys":
4536             val = vol["dev"]
4537           elif field == "vg":
4538             val = vol["vg"]
4539           elif field == "name":
4540             val = vol["name"]
4541           elif field == "size":
4542             val = int(float(vol["size"]))
4543           elif field == "instance":
4544             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4545           else:
4546             raise errors.ParameterError(field)
4547           node_output.append(str(val))
4548
4549         output.append(node_output)
4550
4551     return output
4552
4553
4554 class LUNodeQueryStorage(NoHooksLU):
4555   """Logical unit for getting information on storage units on node(s).
4556
4557   """
4558   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4559   REQ_BGL = False
4560
4561   def CheckArguments(self):
4562     _CheckOutputFields(static=self._FIELDS_STATIC,
4563                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4564                        selected=self.op.output_fields)
4565
4566   def ExpandNames(self):
4567     self.needed_locks = {}
4568     self.share_locks[locking.LEVEL_NODE] = 1
4569
4570     if self.op.nodes:
4571       self.needed_locks[locking.LEVEL_NODE] = \
4572         _GetWantedNodes(self, self.op.nodes)
4573     else:
4574       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4575
4576   def Exec(self, feedback_fn):
4577     """Computes the list of nodes and their attributes.
4578
4579     """
4580     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4581
4582     # Always get name to sort by
4583     if constants.SF_NAME in self.op.output_fields:
4584       fields = self.op.output_fields[:]
4585     else:
4586       fields = [constants.SF_NAME] + self.op.output_fields
4587
4588     # Never ask for node or type as it's only known to the LU
4589     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4590       while extra in fields:
4591         fields.remove(extra)
4592
4593     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4594     name_idx = field_idx[constants.SF_NAME]
4595
4596     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4597     data = self.rpc.call_storage_list(self.nodes,
4598                                       self.op.storage_type, st_args,
4599                                       self.op.name, fields)
4600
4601     result = []
4602
4603     for node in utils.NiceSort(self.nodes):
4604       nresult = data[node]
4605       if nresult.offline:
4606         continue
4607
4608       msg = nresult.fail_msg
4609       if msg:
4610         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4611         continue
4612
4613       rows = dict([(row[name_idx], row) for row in nresult.payload])
4614
4615       for name in utils.NiceSort(rows.keys()):
4616         row = rows[name]
4617
4618         out = []
4619
4620         for field in self.op.output_fields:
4621           if field == constants.SF_NODE:
4622             val = node
4623           elif field == constants.SF_TYPE:
4624             val = self.op.storage_type
4625           elif field in field_idx:
4626             val = row[field_idx[field]]
4627           else:
4628             raise errors.ParameterError(field)
4629
4630           out.append(val)
4631
4632         result.append(out)
4633
4634     return result
4635
4636
4637 class _InstanceQuery(_QueryBase):
4638   FIELDS = query.INSTANCE_FIELDS
4639
4640   def ExpandNames(self, lu):
4641     lu.needed_locks = {}
4642     lu.share_locks = _ShareAll()
4643
4644     if self.names:
4645       self.wanted = _GetWantedInstances(lu, self.names)
4646     else:
4647       self.wanted = locking.ALL_SET
4648
4649     self.do_locking = (self.use_locking and
4650                        query.IQ_LIVE in self.requested_data)
4651     if self.do_locking:
4652       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4653       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4654       lu.needed_locks[locking.LEVEL_NODE] = []
4655       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4656
4657     self.do_grouplocks = (self.do_locking and
4658                           query.IQ_NODES in self.requested_data)
4659
4660   def DeclareLocks(self, lu, level):
4661     if self.do_locking:
4662       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4663         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4664
4665         # Lock all groups used by instances optimistically; this requires going
4666         # via the node before it's locked, requiring verification later on
4667         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4668           set(group_uuid
4669               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4670               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4671       elif level == locking.LEVEL_NODE:
4672         lu._LockInstancesNodes() # pylint: disable=W0212
4673
4674   @staticmethod
4675   def _CheckGroupLocks(lu):
4676     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4677     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4678
4679     # Check if node groups for locked instances are still correct
4680     for instance_name in owned_instances:
4681       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4682
4683   def _GetQueryData(self, lu):
4684     """Computes the list of instances and their attributes.
4685
4686     """
4687     if self.do_grouplocks:
4688       self._CheckGroupLocks(lu)
4689
4690     cluster = lu.cfg.GetClusterInfo()
4691     all_info = lu.cfg.GetAllInstancesInfo()
4692
4693     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4694
4695     instance_list = [all_info[name] for name in instance_names]
4696     nodes = frozenset(itertools.chain(*(inst.all_nodes
4697                                         for inst in instance_list)))
4698     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4699     bad_nodes = []
4700     offline_nodes = []
4701     wrongnode_inst = set()
4702
4703     # Gather data as requested
4704     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4705       live_data = {}
4706       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4707       for name in nodes:
4708         result = node_data[name]
4709         if result.offline:
4710           # offline nodes will be in both lists
4711           assert result.fail_msg
4712           offline_nodes.append(name)
4713         if result.fail_msg:
4714           bad_nodes.append(name)
4715         elif result.payload:
4716           for inst in result.payload:
4717             if inst in all_info:
4718               if all_info[inst].primary_node == name:
4719                 live_data.update(result.payload)
4720               else:
4721                 wrongnode_inst.add(inst)
4722             else:
4723               # orphan instance; we don't list it here as we don't
4724               # handle this case yet in the output of instance listing
4725               logging.warning("Orphan instance '%s' found on node %s",
4726                               inst, name)
4727         # else no instance is alive
4728     else:
4729       live_data = {}
4730
4731     if query.IQ_DISKUSAGE in self.requested_data:
4732       disk_usage = dict((inst.name,
4733                          _ComputeDiskSize(inst.disk_template,
4734                                           [{constants.IDISK_SIZE: disk.size}
4735                                            for disk in inst.disks]))
4736                         for inst in instance_list)
4737     else:
4738       disk_usage = None
4739
4740     if query.IQ_CONSOLE in self.requested_data:
4741       consinfo = {}
4742       for inst in instance_list:
4743         if inst.name in live_data:
4744           # Instance is running
4745           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4746         else:
4747           consinfo[inst.name] = None
4748       assert set(consinfo.keys()) == set(instance_names)
4749     else:
4750       consinfo = None
4751
4752     if query.IQ_NODES in self.requested_data:
4753       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4754                                             instance_list)))
4755       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4756       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4757                     for uuid in set(map(operator.attrgetter("group"),
4758                                         nodes.values())))
4759     else:
4760       nodes = None
4761       groups = None
4762
4763     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4764                                    disk_usage, offline_nodes, bad_nodes,
4765                                    live_data, wrongnode_inst, consinfo,
4766                                    nodes, groups)
4767
4768
4769 class LUQuery(NoHooksLU):
4770   """Query for resources/items of a certain kind.
4771
4772   """
4773   # pylint: disable=W0142
4774   REQ_BGL = False
4775
4776   def CheckArguments(self):
4777     qcls = _GetQueryImplementation(self.op.what)
4778
4779     self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4780
4781   def ExpandNames(self):
4782     self.impl.ExpandNames(self)
4783
4784   def DeclareLocks(self, level):
4785     self.impl.DeclareLocks(self, level)
4786
4787   def Exec(self, feedback_fn):
4788     return self.impl.NewStyleQuery(self)
4789
4790
4791 class LUQueryFields(NoHooksLU):
4792   """Query for resources/items of a certain kind.
4793
4794   """
4795   # pylint: disable=W0142
4796   REQ_BGL = False
4797
4798   def CheckArguments(self):
4799     self.qcls = _GetQueryImplementation(self.op.what)
4800
4801   def ExpandNames(self):
4802     self.needed_locks = {}
4803
4804   def Exec(self, feedback_fn):
4805     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4806
4807
4808 class LUNodeModifyStorage(NoHooksLU):
4809   """Logical unit for modifying a storage volume on a node.
4810
4811   """
4812   REQ_BGL = False
4813
4814   def CheckArguments(self):
4815     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4816
4817     storage_type = self.op.storage_type
4818
4819     try:
4820       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4821     except KeyError:
4822       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4823                                  " modified" % storage_type,
4824                                  errors.ECODE_INVAL)
4825
4826     diff = set(self.op.changes.keys()) - modifiable
4827     if diff:
4828       raise errors.OpPrereqError("The following fields can not be modified for"
4829                                  " storage units of type '%s': %r" %
4830                                  (storage_type, list(diff)),
4831                                  errors.ECODE_INVAL)
4832
4833   def ExpandNames(self):
4834     self.needed_locks = {
4835       locking.LEVEL_NODE: self.op.node_name,
4836       }
4837
4838   def Exec(self, feedback_fn):
4839     """Computes the list of nodes and their attributes.
4840
4841     """
4842     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4843     result = self.rpc.call_storage_modify(self.op.node_name,
4844                                           self.op.storage_type, st_args,
4845                                           self.op.name, self.op.changes)
4846     result.Raise("Failed to modify storage unit '%s' on %s" %
4847                  (self.op.name, self.op.node_name))
4848
4849
4850 class LUNodeAdd(LogicalUnit):
4851   """Logical unit for adding node to the cluster.
4852
4853   """
4854   HPATH = "node-add"
4855   HTYPE = constants.HTYPE_NODE
4856   _NFLAGS = ["master_capable", "vm_capable"]
4857
4858   def CheckArguments(self):
4859     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4860     # validate/normalize the node name
4861     self.hostname = netutils.GetHostname(name=self.op.node_name,
4862                                          family=self.primary_ip_family)
4863     self.op.node_name = self.hostname.name
4864
4865     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4866       raise errors.OpPrereqError("Cannot readd the master node",
4867                                  errors.ECODE_STATE)
4868
4869     if self.op.readd and self.op.group:
4870       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4871                                  " being readded", errors.ECODE_INVAL)
4872
4873   def BuildHooksEnv(self):
4874     """Build hooks env.
4875
4876     This will run on all nodes before, and on all nodes + the new node after.
4877
4878     """
4879     return {
4880       "OP_TARGET": self.op.node_name,
4881       "NODE_NAME": self.op.node_name,
4882       "NODE_PIP": self.op.primary_ip,
4883       "NODE_SIP": self.op.secondary_ip,
4884       "MASTER_CAPABLE": str(self.op.master_capable),
4885       "VM_CAPABLE": str(self.op.vm_capable),
4886       }
4887
4888   def BuildHooksNodes(self):
4889     """Build hooks nodes.
4890
4891     """
4892     # Exclude added node
4893     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4894     post_nodes = pre_nodes + [self.op.node_name, ]
4895
4896     return (pre_nodes, post_nodes)
4897
4898   def CheckPrereq(self):
4899     """Check prerequisites.
4900
4901     This checks:
4902      - the new node is not already in the config
4903      - it is resolvable
4904      - its parameters (single/dual homed) matches the cluster
4905
4906     Any errors are signaled by raising errors.OpPrereqError.
4907
4908     """
4909     cfg = self.cfg
4910     hostname = self.hostname
4911     node = hostname.name
4912     primary_ip = self.op.primary_ip = hostname.ip
4913     if self.op.secondary_ip is None:
4914       if self.primary_ip_family == netutils.IP6Address.family:
4915         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4916                                    " IPv4 address must be given as secondary",
4917                                    errors.ECODE_INVAL)
4918       self.op.secondary_ip = primary_ip
4919
4920     secondary_ip = self.op.secondary_ip
4921     if not netutils.IP4Address.IsValid(secondary_ip):
4922       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4923                                  " address" % secondary_ip, errors.ECODE_INVAL)
4924
4925     node_list = cfg.GetNodeList()
4926     if not self.op.readd and node in node_list:
4927       raise errors.OpPrereqError("Node %s is already in the configuration" %
4928                                  node, errors.ECODE_EXISTS)
4929     elif self.op.readd and node not in node_list:
4930       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4931                                  errors.ECODE_NOENT)
4932
4933     self.changed_primary_ip = False
4934
4935     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4936       if self.op.readd and node == existing_node_name:
4937         if existing_node.secondary_ip != secondary_ip:
4938           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4939                                      " address configuration as before",
4940                                      errors.ECODE_INVAL)
4941         if existing_node.primary_ip != primary_ip:
4942           self.changed_primary_ip = True
4943
4944         continue
4945
4946       if (existing_node.primary_ip == primary_ip or
4947           existing_node.secondary_ip == primary_ip or
4948           existing_node.primary_ip == secondary_ip or
4949           existing_node.secondary_ip == secondary_ip):
4950         raise errors.OpPrereqError("New node ip address(es) conflict with"
4951                                    " existing node %s" % existing_node.name,
4952                                    errors.ECODE_NOTUNIQUE)
4953
4954     # After this 'if' block, None is no longer a valid value for the
4955     # _capable op attributes
4956     if self.op.readd:
4957       old_node = self.cfg.GetNodeInfo(node)
4958       assert old_node is not None, "Can't retrieve locked node %s" % node
4959       for attr in self._NFLAGS:
4960         if getattr(self.op, attr) is None:
4961           setattr(self.op, attr, getattr(old_node, attr))
4962     else:
4963       for attr in self._NFLAGS:
4964         if getattr(self.op, attr) is None:
4965           setattr(self.op, attr, True)
4966
4967     if self.op.readd and not self.op.vm_capable:
4968       pri, sec = cfg.GetNodeInstances(node)
4969       if pri or sec:
4970         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4971                                    " flag set to false, but it already holds"
4972                                    " instances" % node,
4973                                    errors.ECODE_STATE)
4974
4975     # check that the type of the node (single versus dual homed) is the
4976     # same as for the master
4977     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4978     master_singlehomed = myself.secondary_ip == myself.primary_ip
4979     newbie_singlehomed = secondary_ip == primary_ip
4980     if master_singlehomed != newbie_singlehomed:
4981       if master_singlehomed:
4982         raise errors.OpPrereqError("The master has no secondary ip but the"
4983                                    " new node has one",
4984                                    errors.ECODE_INVAL)
4985       else:
4986         raise errors.OpPrereqError("The master has a secondary ip but the"
4987                                    " new node doesn't have one",
4988                                    errors.ECODE_INVAL)
4989
4990     # checks reachability
4991     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4992       raise errors.OpPrereqError("Node not reachable by ping",
4993                                  errors.ECODE_ENVIRON)
4994
4995     if not newbie_singlehomed:
4996       # check reachability from my secondary ip to newbie's secondary ip
4997       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4998                            source=myself.secondary_ip):
4999         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5000                                    " based ping to node daemon port",
5001                                    errors.ECODE_ENVIRON)
5002
5003     if self.op.readd:
5004       exceptions = [node]
5005     else:
5006       exceptions = []
5007
5008     if self.op.master_capable:
5009       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5010     else:
5011       self.master_candidate = False
5012
5013     if self.op.readd:
5014       self.new_node = old_node
5015     else:
5016       node_group = cfg.LookupNodeGroup(self.op.group)
5017       self.new_node = objects.Node(name=node,
5018                                    primary_ip=primary_ip,
5019                                    secondary_ip=secondary_ip,
5020                                    master_candidate=self.master_candidate,
5021                                    offline=False, drained=False,
5022                                    group=node_group)
5023
5024     if self.op.ndparams:
5025       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5026
5027   def Exec(self, feedback_fn):
5028     """Adds the new node to the cluster.
5029
5030     """
5031     new_node = self.new_node
5032     node = new_node.name
5033
5034     # We adding a new node so we assume it's powered
5035     new_node.powered = True
5036
5037     # for re-adds, reset the offline/drained/master-candidate flags;
5038     # we need to reset here, otherwise offline would prevent RPC calls
5039     # later in the procedure; this also means that if the re-add
5040     # fails, we are left with a non-offlined, broken node
5041     if self.op.readd:
5042       new_node.drained = new_node.offline = False # pylint: disable=W0201
5043       self.LogInfo("Readding a node, the offline/drained flags were reset")
5044       # if we demote the node, we do cleanup later in the procedure
5045       new_node.master_candidate = self.master_candidate
5046       if self.changed_primary_ip:
5047         new_node.primary_ip = self.op.primary_ip
5048
5049     # copy the master/vm_capable flags
5050     for attr in self._NFLAGS:
5051       setattr(new_node, attr, getattr(self.op, attr))
5052
5053     # notify the user about any possible mc promotion
5054     if new_node.master_candidate:
5055       self.LogInfo("Node will be a master candidate")
5056
5057     if self.op.ndparams:
5058       new_node.ndparams = self.op.ndparams
5059     else:
5060       new_node.ndparams = {}
5061
5062     # check connectivity
5063     result = self.rpc.call_version([node])[node]
5064     result.Raise("Can't get version information from node %s" % node)
5065     if constants.PROTOCOL_VERSION == result.payload:
5066       logging.info("Communication to node %s fine, sw version %s match",
5067                    node, result.payload)
5068     else:
5069       raise errors.OpExecError("Version mismatch master version %s,"
5070                                " node version %s" %
5071                                (constants.PROTOCOL_VERSION, result.payload))
5072
5073     # Add node to our /etc/hosts, and add key to known_hosts
5074     if self.cfg.GetClusterInfo().modify_etc_hosts:
5075       master_node = self.cfg.GetMasterNode()
5076       result = self.rpc.call_etc_hosts_modify(master_node,
5077                                               constants.ETC_HOSTS_ADD,
5078                                               self.hostname.name,
5079                                               self.hostname.ip)
5080       result.Raise("Can't update hosts file with new host data")
5081
5082     if new_node.secondary_ip != new_node.primary_ip:
5083       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5084                                False)
5085
5086     node_verify_list = [self.cfg.GetMasterNode()]
5087     node_verify_param = {
5088       constants.NV_NODELIST: ([node], {}),
5089       # TODO: do a node-net-test as well?
5090     }
5091
5092     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5093                                        self.cfg.GetClusterName())
5094     for verifier in node_verify_list:
5095       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5096       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5097       if nl_payload:
5098         for failed in nl_payload:
5099           feedback_fn("ssh/hostname verification failed"
5100                       " (checking from %s): %s" %
5101                       (verifier, nl_payload[failed]))
5102         raise errors.OpExecError("ssh/hostname verification failed")
5103
5104     if self.op.readd:
5105       _RedistributeAncillaryFiles(self)
5106       self.context.ReaddNode(new_node)
5107       # make sure we redistribute the config
5108       self.cfg.Update(new_node, feedback_fn)
5109       # and make sure the new node will not have old files around
5110       if not new_node.master_candidate:
5111         result = self.rpc.call_node_demote_from_mc(new_node.name)
5112         msg = result.fail_msg
5113         if msg:
5114           self.LogWarning("Node failed to demote itself from master"
5115                           " candidate status: %s" % msg)
5116     else:
5117       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5118                                   additional_vm=self.op.vm_capable)
5119       self.context.AddNode(new_node, self.proc.GetECId())
5120
5121
5122 class LUNodeSetParams(LogicalUnit):
5123   """Modifies the parameters of a node.
5124
5125   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5126       to the node role (as _ROLE_*)
5127   @cvar _R2F: a dictionary from node role to tuples of flags
5128   @cvar _FLAGS: a list of attribute names corresponding to the flags
5129
5130   """
5131   HPATH = "node-modify"
5132   HTYPE = constants.HTYPE_NODE
5133   REQ_BGL = False
5134   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5135   _F2R = {
5136     (True, False, False): _ROLE_CANDIDATE,
5137     (False, True, False): _ROLE_DRAINED,
5138     (False, False, True): _ROLE_OFFLINE,
5139     (False, False, False): _ROLE_REGULAR,
5140     }
5141   _R2F = dict((v, k) for k, v in _F2R.items())
5142   _FLAGS = ["master_candidate", "drained", "offline"]
5143
5144   def CheckArguments(self):
5145     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5146     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5147                 self.op.master_capable, self.op.vm_capable,
5148                 self.op.secondary_ip, self.op.ndparams]
5149     if all_mods.count(None) == len(all_mods):
5150       raise errors.OpPrereqError("Please pass at least one modification",
5151                                  errors.ECODE_INVAL)
5152     if all_mods.count(True) > 1:
5153       raise errors.OpPrereqError("Can't set the node into more than one"
5154                                  " state at the same time",
5155                                  errors.ECODE_INVAL)
5156
5157     # Boolean value that tells us whether we might be demoting from MC
5158     self.might_demote = (self.op.master_candidate == False or
5159                          self.op.offline == True or
5160                          self.op.drained == True or
5161                          self.op.master_capable == False)
5162
5163     if self.op.secondary_ip:
5164       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5165         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5166                                    " address" % self.op.secondary_ip,
5167                                    errors.ECODE_INVAL)
5168
5169     self.lock_all = self.op.auto_promote and self.might_demote
5170     self.lock_instances = self.op.secondary_ip is not None
5171
5172   def ExpandNames(self):
5173     if self.lock_all:
5174       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5175     else:
5176       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5177
5178     if self.lock_instances:
5179       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5180
5181   def DeclareLocks(self, level):
5182     # If we have locked all instances, before waiting to lock nodes, release
5183     # all the ones living on nodes unrelated to the current operation.
5184     if level == locking.LEVEL_NODE and self.lock_instances:
5185       self.affected_instances = []
5186       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5187         instances_keep = []
5188
5189         # Build list of instances to release
5190         locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5191         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5192           if (instance.disk_template in constants.DTS_INT_MIRROR and
5193               self.op.node_name in instance.all_nodes):
5194             instances_keep.append(instance_name)
5195             self.affected_instances.append(instance)
5196
5197         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5198
5199         assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5200                 set(instances_keep))
5201
5202   def BuildHooksEnv(self):
5203     """Build hooks env.
5204
5205     This runs on the master node.
5206
5207     """
5208     return {
5209       "OP_TARGET": self.op.node_name,
5210       "MASTER_CANDIDATE": str(self.op.master_candidate),
5211       "OFFLINE": str(self.op.offline),
5212       "DRAINED": str(self.op.drained),
5213       "MASTER_CAPABLE": str(self.op.master_capable),
5214       "VM_CAPABLE": str(self.op.vm_capable),
5215       }
5216
5217   def BuildHooksNodes(self):
5218     """Build hooks nodes.
5219
5220     """
5221     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5222     return (nl, nl)
5223
5224   def CheckPrereq(self):
5225     """Check prerequisites.
5226
5227     This only checks the instance list against the existing names.
5228
5229     """
5230     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5231
5232     if (self.op.master_candidate is not None or
5233         self.op.drained is not None or
5234         self.op.offline is not None):
5235       # we can't change the master's node flags
5236       if self.op.node_name == self.cfg.GetMasterNode():
5237         raise errors.OpPrereqError("The master role can be changed"
5238                                    " only via master-failover",
5239                                    errors.ECODE_INVAL)
5240
5241     if self.op.master_candidate and not node.master_capable:
5242       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5243                                  " it a master candidate" % node.name,
5244                                  errors.ECODE_STATE)
5245
5246     if self.op.vm_capable == False:
5247       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5248       if ipri or isec:
5249         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5250                                    " the vm_capable flag" % node.name,
5251                                    errors.ECODE_STATE)
5252
5253     if node.master_candidate and self.might_demote and not self.lock_all:
5254       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5255       # check if after removing the current node, we're missing master
5256       # candidates
5257       (mc_remaining, mc_should, _) = \
5258           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5259       if mc_remaining < mc_should:
5260         raise errors.OpPrereqError("Not enough master candidates, please"
5261                                    " pass auto promote option to allow"
5262                                    " promotion", errors.ECODE_STATE)
5263
5264     self.old_flags = old_flags = (node.master_candidate,
5265                                   node.drained, node.offline)
5266     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5267     self.old_role = old_role = self._F2R[old_flags]
5268
5269     # Check for ineffective changes
5270     for attr in self._FLAGS:
5271       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5272         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5273         setattr(self.op, attr, None)
5274
5275     # Past this point, any flag change to False means a transition
5276     # away from the respective state, as only real changes are kept
5277
5278     # TODO: We might query the real power state if it supports OOB
5279     if _SupportsOob(self.cfg, node):
5280       if self.op.offline is False and not (node.powered or
5281                                            self.op.powered == True):
5282         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5283                                     " offline status can be reset") %
5284                                    self.op.node_name)
5285     elif self.op.powered is not None:
5286       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5287                                   " as it does not support out-of-band"
5288                                   " handling") % self.op.node_name)
5289
5290     # If we're being deofflined/drained, we'll MC ourself if needed
5291     if (self.op.drained == False or self.op.offline == False or
5292         (self.op.master_capable and not node.master_capable)):
5293       if _DecideSelfPromotion(self):
5294         self.op.master_candidate = True
5295         self.LogInfo("Auto-promoting node to master candidate")
5296
5297     # If we're no longer master capable, we'll demote ourselves from MC
5298     if self.op.master_capable == False and node.master_candidate:
5299       self.LogInfo("Demoting from master candidate")
5300       self.op.master_candidate = False
5301
5302     # Compute new role
5303     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5304     if self.op.master_candidate:
5305       new_role = self._ROLE_CANDIDATE
5306     elif self.op.drained:
5307       new_role = self._ROLE_DRAINED
5308     elif self.op.offline:
5309       new_role = self._ROLE_OFFLINE
5310     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5311       # False is still in new flags, which means we're un-setting (the
5312       # only) True flag
5313       new_role = self._ROLE_REGULAR
5314     else: # no new flags, nothing, keep old role
5315       new_role = old_role
5316
5317     self.new_role = new_role
5318
5319     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5320       # Trying to transition out of offline status
5321       result = self.rpc.call_version([node.name])[node.name]
5322       if result.fail_msg:
5323         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5324                                    " to report its version: %s" %
5325                                    (node.name, result.fail_msg),
5326                                    errors.ECODE_STATE)
5327       else:
5328         self.LogWarning("Transitioning node from offline to online state"
5329                         " without using re-add. Please make sure the node"
5330                         " is healthy!")
5331
5332     if self.op.secondary_ip:
5333       # Ok even without locking, because this can't be changed by any LU
5334       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5335       master_singlehomed = master.secondary_ip == master.primary_ip
5336       if master_singlehomed and self.op.secondary_ip:
5337         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5338                                    " homed cluster", errors.ECODE_INVAL)
5339
5340       if node.offline:
5341         if self.affected_instances:
5342           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5343                                      " node has instances (%s) configured"
5344                                      " to use it" % self.affected_instances)
5345       else:
5346         # On online nodes, check that no instances are running, and that
5347         # the node has the new ip and we can reach it.
5348         for instance in self.affected_instances:
5349           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5350
5351         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5352         if master.name != node.name:
5353           # check reachability from master secondary ip to new secondary ip
5354           if not netutils.TcpPing(self.op.secondary_ip,
5355                                   constants.DEFAULT_NODED_PORT,
5356                                   source=master.secondary_ip):
5357             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5358                                        " based ping to node daemon port",
5359                                        errors.ECODE_ENVIRON)
5360
5361     if self.op.ndparams:
5362       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5363       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5364       self.new_ndparams = new_ndparams
5365
5366   def Exec(self, feedback_fn):
5367     """Modifies a node.
5368
5369     """
5370     node = self.node
5371     old_role = self.old_role
5372     new_role = self.new_role
5373
5374     result = []
5375
5376     if self.op.ndparams:
5377       node.ndparams = self.new_ndparams
5378
5379     if self.op.powered is not None:
5380       node.powered = self.op.powered
5381
5382     for attr in ["master_capable", "vm_capable"]:
5383       val = getattr(self.op, attr)
5384       if val is not None:
5385         setattr(node, attr, val)
5386         result.append((attr, str(val)))
5387
5388     if new_role != old_role:
5389       # Tell the node to demote itself, if no longer MC and not offline
5390       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5391         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5392         if msg:
5393           self.LogWarning("Node failed to demote itself: %s", msg)
5394
5395       new_flags = self._R2F[new_role]
5396       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5397         if of != nf:
5398           result.append((desc, str(nf)))
5399       (node.master_candidate, node.drained, node.offline) = new_flags
5400
5401       # we locked all nodes, we adjust the CP before updating this node
5402       if self.lock_all:
5403         _AdjustCandidatePool(self, [node.name])
5404
5405     if self.op.secondary_ip:
5406       node.secondary_ip = self.op.secondary_ip
5407       result.append(("secondary_ip", self.op.secondary_ip))
5408
5409     # this will trigger configuration file update, if needed
5410     self.cfg.Update(node, feedback_fn)
5411
5412     # this will trigger job queue propagation or cleanup if the mc
5413     # flag changed
5414     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5415       self.context.ReaddNode(node)
5416
5417     return result
5418
5419
5420 class LUNodePowercycle(NoHooksLU):
5421   """Powercycles a node.
5422
5423   """
5424   REQ_BGL = False
5425
5426   def CheckArguments(self):
5427     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5428     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5429       raise errors.OpPrereqError("The node is the master and the force"
5430                                  " parameter was not set",
5431                                  errors.ECODE_INVAL)
5432
5433   def ExpandNames(self):
5434     """Locking for PowercycleNode.
5435
5436     This is a last-resort option and shouldn't block on other
5437     jobs. Therefore, we grab no locks.
5438
5439     """
5440     self.needed_locks = {}
5441
5442   def Exec(self, feedback_fn):
5443     """Reboots a node.
5444
5445     """
5446     result = self.rpc.call_node_powercycle(self.op.node_name,
5447                                            self.cfg.GetHypervisorType())
5448     result.Raise("Failed to schedule the reboot")
5449     return result.payload
5450
5451
5452 class LUClusterQuery(NoHooksLU):
5453   """Query cluster configuration.
5454
5455   """
5456   REQ_BGL = False
5457
5458   def ExpandNames(self):
5459     self.needed_locks = {}
5460
5461   def Exec(self, feedback_fn):
5462     """Return cluster config.
5463
5464     """
5465     cluster = self.cfg.GetClusterInfo()
5466     os_hvp = {}
5467
5468     # Filter just for enabled hypervisors
5469     for os_name, hv_dict in cluster.os_hvp.items():
5470       os_hvp[os_name] = {}
5471       for hv_name, hv_params in hv_dict.items():
5472         if hv_name in cluster.enabled_hypervisors:
5473           os_hvp[os_name][hv_name] = hv_params
5474
5475     # Convert ip_family to ip_version
5476     primary_ip_version = constants.IP4_VERSION
5477     if cluster.primary_ip_family == netutils.IP6Address.family:
5478       primary_ip_version = constants.IP6_VERSION
5479
5480     result = {
5481       "software_version": constants.RELEASE_VERSION,
5482       "protocol_version": constants.PROTOCOL_VERSION,
5483       "config_version": constants.CONFIG_VERSION,
5484       "os_api_version": max(constants.OS_API_VERSIONS),
5485       "export_version": constants.EXPORT_VERSION,
5486       "architecture": (platform.architecture()[0], platform.machine()),
5487       "name": cluster.cluster_name,
5488       "master": cluster.master_node,
5489       "default_hypervisor": cluster.enabled_hypervisors[0],
5490       "enabled_hypervisors": cluster.enabled_hypervisors,
5491       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5492                         for hypervisor_name in cluster.enabled_hypervisors]),
5493       "os_hvp": os_hvp,
5494       "beparams": cluster.beparams,
5495       "osparams": cluster.osparams,
5496       "nicparams": cluster.nicparams,
5497       "ndparams": cluster.ndparams,
5498       "candidate_pool_size": cluster.candidate_pool_size,
5499       "master_netdev": cluster.master_netdev,
5500       "volume_group_name": cluster.volume_group_name,
5501       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5502       "file_storage_dir": cluster.file_storage_dir,
5503       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5504       "maintain_node_health": cluster.maintain_node_health,
5505       "ctime": cluster.ctime,
5506       "mtime": cluster.mtime,
5507       "uuid": cluster.uuid,
5508       "tags": list(cluster.GetTags()),
5509       "uid_pool": cluster.uid_pool,
5510       "default_iallocator": cluster.default_iallocator,
5511       "reserved_lvs": cluster.reserved_lvs,
5512       "primary_ip_version": primary_ip_version,
5513       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5514       "hidden_os": cluster.hidden_os,
5515       "blacklisted_os": cluster.blacklisted_os,
5516       }
5517
5518     return result
5519
5520
5521 class LUClusterConfigQuery(NoHooksLU):
5522   """Return configuration values.
5523
5524   """
5525   REQ_BGL = False
5526   _FIELDS_DYNAMIC = utils.FieldSet()
5527   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5528                                   "watcher_pause", "volume_group_name")
5529
5530   def CheckArguments(self):
5531     _CheckOutputFields(static=self._FIELDS_STATIC,
5532                        dynamic=self._FIELDS_DYNAMIC,
5533                        selected=self.op.output_fields)
5534
5535   def ExpandNames(self):
5536     self.needed_locks = {}
5537
5538   def Exec(self, feedback_fn):
5539     """Dump a representation of the cluster config to the standard output.
5540
5541     """
5542     values = []
5543     for field in self.op.output_fields:
5544       if field == "cluster_name":
5545         entry = self.cfg.GetClusterName()
5546       elif field == "master_node":
5547         entry = self.cfg.GetMasterNode()
5548       elif field == "drain_flag":
5549         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5550       elif field == "watcher_pause":
5551         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5552       elif field == "volume_group_name":
5553         entry = self.cfg.GetVGName()
5554       else:
5555         raise errors.ParameterError(field)
5556       values.append(entry)
5557     return values
5558
5559
5560 class LUInstanceActivateDisks(NoHooksLU):
5561   """Bring up an instance's disks.
5562
5563   """
5564   REQ_BGL = False
5565
5566   def ExpandNames(self):
5567     self._ExpandAndLockInstance()
5568     self.needed_locks[locking.LEVEL_NODE] = []
5569     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5570
5571   def DeclareLocks(self, level):
5572     if level == locking.LEVEL_NODE:
5573       self._LockInstancesNodes()
5574
5575   def CheckPrereq(self):
5576     """Check prerequisites.
5577
5578     This checks that the instance is in the cluster.
5579
5580     """
5581     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5582     assert self.instance is not None, \
5583       "Cannot retrieve locked instance %s" % self.op.instance_name
5584     _CheckNodeOnline(self, self.instance.primary_node)
5585
5586   def Exec(self, feedback_fn):
5587     """Activate the disks.
5588
5589     """
5590     disks_ok, disks_info = \
5591               _AssembleInstanceDisks(self, self.instance,
5592                                      ignore_size=self.op.ignore_size)
5593     if not disks_ok:
5594       raise errors.OpExecError("Cannot activate block devices")
5595
5596     return disks_info
5597
5598
5599 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5600                            ignore_size=False):
5601   """Prepare the block devices for an instance.
5602
5603   This sets up the block devices on all nodes.
5604
5605   @type lu: L{LogicalUnit}
5606   @param lu: the logical unit on whose behalf we execute
5607   @type instance: L{objects.Instance}
5608   @param instance: the instance for whose disks we assemble
5609   @type disks: list of L{objects.Disk} or None
5610   @param disks: which disks to assemble (or all, if None)
5611   @type ignore_secondaries: boolean
5612   @param ignore_secondaries: if true, errors on secondary nodes
5613       won't result in an error return from the function
5614   @type ignore_size: boolean
5615   @param ignore_size: if true, the current known size of the disk
5616       will not be used during the disk activation, useful for cases
5617       when the size is wrong
5618   @return: False if the operation failed, otherwise a list of
5619       (host, instance_visible_name, node_visible_name)
5620       with the mapping from node devices to instance devices
5621
5622   """
5623   device_info = []
5624   disks_ok = True
5625   iname = instance.name
5626   disks = _ExpandCheckDisks(instance, disks)
5627
5628   # With the two passes mechanism we try to reduce the window of
5629   # opportunity for the race condition of switching DRBD to primary
5630   # before handshaking occured, but we do not eliminate it
5631
5632   # The proper fix would be to wait (with some limits) until the
5633   # connection has been made and drbd transitions from WFConnection
5634   # into any other network-connected state (Connected, SyncTarget,
5635   # SyncSource, etc.)
5636
5637   # 1st pass, assemble on all nodes in secondary mode
5638   for idx, inst_disk in enumerate(disks):
5639     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5640       if ignore_size:
5641         node_disk = node_disk.Copy()
5642         node_disk.UnsetSize()
5643       lu.cfg.SetDiskID(node_disk, node)
5644       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5645       msg = result.fail_msg
5646       if msg:
5647         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5648                            " (is_primary=False, pass=1): %s",
5649                            inst_disk.iv_name, node, msg)
5650         if not ignore_secondaries:
5651           disks_ok = False
5652
5653   # FIXME: race condition on drbd migration to primary
5654
5655   # 2nd pass, do only the primary node
5656   for idx, inst_disk in enumerate(disks):
5657     dev_path = None
5658
5659     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5660       if node != instance.primary_node:
5661         continue
5662       if ignore_size:
5663         node_disk = node_disk.Copy()
5664         node_disk.UnsetSize()
5665       lu.cfg.SetDiskID(node_disk, node)
5666       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5667       msg = result.fail_msg
5668       if msg:
5669         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5670                            " (is_primary=True, pass=2): %s",
5671                            inst_disk.iv_name, node, msg)
5672         disks_ok = False
5673       else:
5674         dev_path = result.payload
5675
5676     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5677
5678   # leave the disks configured for the primary node
5679   # this is a workaround that would be fixed better by
5680   # improving the logical/physical id handling
5681   for disk in disks:
5682     lu.cfg.SetDiskID(disk, instance.primary_node)
5683
5684   return disks_ok, device_info
5685
5686
5687 def _StartInstanceDisks(lu, instance, force):
5688   """Start the disks of an instance.
5689
5690   """
5691   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5692                                            ignore_secondaries=force)
5693   if not disks_ok:
5694     _ShutdownInstanceDisks(lu, instance)
5695     if force is not None and not force:
5696       lu.proc.LogWarning("", hint="If the message above refers to a"
5697                          " secondary node,"
5698                          " you can retry the operation using '--force'.")
5699     raise errors.OpExecError("Disk consistency error")
5700
5701
5702 class LUInstanceDeactivateDisks(NoHooksLU):
5703   """Shutdown an instance's disks.
5704
5705   """
5706   REQ_BGL = False
5707
5708   def ExpandNames(self):
5709     self._ExpandAndLockInstance()
5710     self.needed_locks[locking.LEVEL_NODE] = []
5711     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5712
5713   def DeclareLocks(self, level):
5714     if level == locking.LEVEL_NODE:
5715       self._LockInstancesNodes()
5716
5717   def CheckPrereq(self):
5718     """Check prerequisites.
5719
5720     This checks that the instance is in the cluster.
5721
5722     """
5723     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5724     assert self.instance is not None, \
5725       "Cannot retrieve locked instance %s" % self.op.instance_name
5726
5727   def Exec(self, feedback_fn):
5728     """Deactivate the disks
5729
5730     """
5731     instance = self.instance
5732     if self.op.force:
5733       _ShutdownInstanceDisks(self, instance)
5734     else:
5735       _SafeShutdownInstanceDisks(self, instance)
5736
5737
5738 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5739   """Shutdown block devices of an instance.
5740
5741   This function checks if an instance is running, before calling
5742   _ShutdownInstanceDisks.
5743
5744   """
5745   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5746   _ShutdownInstanceDisks(lu, instance, disks=disks)
5747
5748
5749 def _ExpandCheckDisks(instance, disks):
5750   """Return the instance disks selected by the disks list
5751
5752   @type disks: list of L{objects.Disk} or None
5753   @param disks: selected disks
5754   @rtype: list of L{objects.Disk}
5755   @return: selected instance disks to act on
5756
5757   """
5758   if disks is None:
5759     return instance.disks
5760   else:
5761     if not set(disks).issubset(instance.disks):
5762       raise errors.ProgrammerError("Can only act on disks belonging to the"
5763                                    " target instance")
5764     return disks
5765
5766
5767 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5768   """Shutdown block devices of an instance.
5769
5770   This does the shutdown on all nodes of the instance.
5771
5772   If the ignore_primary is false, errors on the primary node are
5773   ignored.
5774
5775   """
5776   all_result = True
5777   disks = _ExpandCheckDisks(instance, disks)
5778
5779   for disk in disks:
5780     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5781       lu.cfg.SetDiskID(top_disk, node)
5782       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5783       msg = result.fail_msg
5784       if msg:
5785         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5786                       disk.iv_name, node, msg)
5787         if ((node == instance.primary_node and not ignore_primary) or
5788             (node != instance.primary_node and not result.offline)):
5789           all_result = False
5790   return all_result
5791
5792
5793 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5794   """Checks if a node has enough free memory.
5795
5796   This function check if a given node has the needed amount of free
5797   memory. In case the node has less memory or we cannot get the
5798   information from the node, this function raise an OpPrereqError
5799   exception.
5800
5801   @type lu: C{LogicalUnit}
5802   @param lu: a logical unit from which we get configuration data
5803   @type node: C{str}
5804   @param node: the node to check
5805   @type reason: C{str}
5806   @param reason: string to use in the error message
5807   @type requested: C{int}
5808   @param requested: the amount of memory in MiB to check for
5809   @type hypervisor_name: C{str}
5810   @param hypervisor_name: the hypervisor to ask for memory stats
5811   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5812       we cannot check the node
5813
5814   """
5815   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5816   nodeinfo[node].Raise("Can't get data from node %s" % node,
5817                        prereq=True, ecode=errors.ECODE_ENVIRON)
5818   free_mem = nodeinfo[node].payload.get("memory_free", None)
5819   if not isinstance(free_mem, int):
5820     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5821                                " was '%s'" % (node, free_mem),
5822                                errors.ECODE_ENVIRON)
5823   if requested > free_mem:
5824     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5825                                " needed %s MiB, available %s MiB" %
5826                                (node, reason, requested, free_mem),
5827                                errors.ECODE_NORES)
5828
5829
5830 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5831   """Checks if nodes have enough free disk space in the all VGs.
5832
5833   This function check if all given nodes have the needed amount of
5834   free disk. In case any node has less disk or we cannot get the
5835   information from the node, this function raise an OpPrereqError
5836   exception.
5837
5838   @type lu: C{LogicalUnit}
5839   @param lu: a logical unit from which we get configuration data
5840   @type nodenames: C{list}
5841   @param nodenames: the list of node names to check
5842   @type req_sizes: C{dict}
5843   @param req_sizes: the hash of vg and corresponding amount of disk in
5844       MiB to check for
5845   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5846       or we cannot check the node
5847
5848   """
5849   for vg, req_size in req_sizes.items():
5850     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5851
5852
5853 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5854   """Checks if nodes have enough free disk space in the specified VG.
5855
5856   This function check if all given nodes have the needed amount of
5857   free disk. In case any node has less disk or we cannot get the
5858   information from the node, this function raise an OpPrereqError
5859   exception.
5860
5861   @type lu: C{LogicalUnit}
5862   @param lu: a logical unit from which we get configuration data
5863   @type nodenames: C{list}
5864   @param nodenames: the list of node names to check
5865   @type vg: C{str}
5866   @param vg: the volume group to check
5867   @type requested: C{int}
5868   @param requested: the amount of disk in MiB to check for
5869   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5870       or we cannot check the node
5871
5872   """
5873   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5874   for node in nodenames:
5875     info = nodeinfo[node]
5876     info.Raise("Cannot get current information from node %s" % node,
5877                prereq=True, ecode=errors.ECODE_ENVIRON)
5878     vg_free = info.payload.get("vg_free", None)
5879     if not isinstance(vg_free, int):
5880       raise errors.OpPrereqError("Can't compute free disk space on node"
5881                                  " %s for vg %s, result was '%s'" %
5882                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5883     if requested > vg_free:
5884       raise errors.OpPrereqError("Not enough disk space on target node %s"
5885                                  " vg %s: required %d MiB, available %d MiB" %
5886                                  (node, vg, requested, vg_free),
5887                                  errors.ECODE_NORES)
5888
5889
5890 class LUInstanceStartup(LogicalUnit):
5891   """Starts an instance.
5892
5893   """
5894   HPATH = "instance-start"
5895   HTYPE = constants.HTYPE_INSTANCE
5896   REQ_BGL = False
5897
5898   def CheckArguments(self):
5899     # extra beparams
5900     if self.op.beparams:
5901       # fill the beparams dict
5902       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5903
5904   def ExpandNames(self):
5905     self._ExpandAndLockInstance()
5906
5907   def BuildHooksEnv(self):
5908     """Build hooks env.
5909
5910     This runs on master, primary and secondary nodes of the instance.
5911
5912     """
5913     env = {
5914       "FORCE": self.op.force,
5915       }
5916
5917     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5918
5919     return env
5920
5921   def BuildHooksNodes(self):
5922     """Build hooks nodes.
5923
5924     """
5925     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5926     return (nl, nl)
5927
5928   def CheckPrereq(self):
5929     """Check prerequisites.
5930
5931     This checks that the instance is in the cluster.
5932
5933     """
5934     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5935     assert self.instance is not None, \
5936       "Cannot retrieve locked instance %s" % self.op.instance_name
5937
5938     # extra hvparams
5939     if self.op.hvparams:
5940       # check hypervisor parameter syntax (locally)
5941       cluster = self.cfg.GetClusterInfo()
5942       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5943       filled_hvp = cluster.FillHV(instance)
5944       filled_hvp.update(self.op.hvparams)
5945       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5946       hv_type.CheckParameterSyntax(filled_hvp)
5947       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5948
5949     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5950
5951     if self.primary_offline and self.op.ignore_offline_nodes:
5952       self.proc.LogWarning("Ignoring offline primary node")
5953
5954       if self.op.hvparams or self.op.beparams:
5955         self.proc.LogWarning("Overridden parameters are ignored")
5956     else:
5957       _CheckNodeOnline(self, instance.primary_node)
5958
5959       bep = self.cfg.GetClusterInfo().FillBE(instance)
5960
5961       # check bridges existence
5962       _CheckInstanceBridgesExist(self, instance)
5963
5964       remote_info = self.rpc.call_instance_info(instance.primary_node,
5965                                                 instance.name,
5966                                                 instance.hypervisor)
5967       remote_info.Raise("Error checking node %s" % instance.primary_node,
5968                         prereq=True, ecode=errors.ECODE_ENVIRON)
5969       if not remote_info.payload: # not running already
5970         _CheckNodeFreeMemory(self, instance.primary_node,
5971                              "starting instance %s" % instance.name,
5972                              bep[constants.BE_MEMORY], instance.hypervisor)
5973
5974   def Exec(self, feedback_fn):
5975     """Start the instance.
5976
5977     """
5978     instance = self.instance
5979     force = self.op.force
5980
5981     if not self.op.no_remember:
5982       self.cfg.MarkInstanceUp(instance.name)
5983
5984     if self.primary_offline:
5985       assert self.op.ignore_offline_nodes
5986       self.proc.LogInfo("Primary node offline, marked instance as started")
5987     else:
5988       node_current = instance.primary_node
5989
5990       _StartInstanceDisks(self, instance, force)
5991
5992       result = self.rpc.call_instance_start(node_current, instance,
5993                                             self.op.hvparams, self.op.beparams,
5994                                             self.op.startup_paused)
5995       msg = result.fail_msg
5996       if msg:
5997         _ShutdownInstanceDisks(self, instance)
5998         raise errors.OpExecError("Could not start instance: %s" % msg)
5999
6000
6001 class LUInstanceReboot(LogicalUnit):
6002   """Reboot an instance.
6003
6004   """
6005   HPATH = "instance-reboot"
6006   HTYPE = constants.HTYPE_INSTANCE
6007   REQ_BGL = False
6008
6009   def ExpandNames(self):
6010     self._ExpandAndLockInstance()
6011
6012   def BuildHooksEnv(self):
6013     """Build hooks env.
6014
6015     This runs on master, primary and secondary nodes of the instance.
6016
6017     """
6018     env = {
6019       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6020       "REBOOT_TYPE": self.op.reboot_type,
6021       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6022       }
6023
6024     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6025
6026     return env
6027
6028   def BuildHooksNodes(self):
6029     """Build hooks nodes.
6030
6031     """
6032     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6033     return (nl, nl)
6034
6035   def CheckPrereq(self):
6036     """Check prerequisites.
6037
6038     This checks that the instance is in the cluster.
6039
6040     """
6041     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6042     assert self.instance is not None, \
6043       "Cannot retrieve locked instance %s" % self.op.instance_name
6044
6045     _CheckNodeOnline(self, instance.primary_node)
6046
6047     # check bridges existence
6048     _CheckInstanceBridgesExist(self, instance)
6049
6050   def Exec(self, feedback_fn):
6051     """Reboot the instance.
6052
6053     """
6054     instance = self.instance
6055     ignore_secondaries = self.op.ignore_secondaries
6056     reboot_type = self.op.reboot_type
6057
6058     remote_info = self.rpc.call_instance_info(instance.primary_node,
6059                                               instance.name,
6060                                               instance.hypervisor)
6061     remote_info.Raise("Error checking node %s" % instance.primary_node)
6062     instance_running = bool(remote_info.payload)
6063
6064     node_current = instance.primary_node
6065
6066     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6067                                             constants.INSTANCE_REBOOT_HARD]:
6068       for disk in instance.disks:
6069         self.cfg.SetDiskID(disk, node_current)
6070       result = self.rpc.call_instance_reboot(node_current, instance,
6071                                              reboot_type,
6072                                              self.op.shutdown_timeout)
6073       result.Raise("Could not reboot instance")
6074     else:
6075       if instance_running:
6076         result = self.rpc.call_instance_shutdown(node_current, instance,
6077                                                  self.op.shutdown_timeout)
6078         result.Raise("Could not shutdown instance for full reboot")
6079         _ShutdownInstanceDisks(self, instance)
6080       else:
6081         self.LogInfo("Instance %s was already stopped, starting now",
6082                      instance.name)
6083       _StartInstanceDisks(self, instance, ignore_secondaries)
6084       result = self.rpc.call_instance_start(node_current, instance,
6085                                             None, None, False)
6086       msg = result.fail_msg
6087       if msg:
6088         _ShutdownInstanceDisks(self, instance)
6089         raise errors.OpExecError("Could not start instance for"
6090                                  " full reboot: %s" % msg)
6091
6092     self.cfg.MarkInstanceUp(instance.name)
6093
6094
6095 class LUInstanceShutdown(LogicalUnit):
6096   """Shutdown an instance.
6097
6098   """
6099   HPATH = "instance-stop"
6100   HTYPE = constants.HTYPE_INSTANCE
6101   REQ_BGL = False
6102
6103   def ExpandNames(self):
6104     self._ExpandAndLockInstance()
6105
6106   def BuildHooksEnv(self):
6107     """Build hooks env.
6108
6109     This runs on master, primary and secondary nodes of the instance.
6110
6111     """
6112     env = _BuildInstanceHookEnvByObject(self, self.instance)
6113     env["TIMEOUT"] = self.op.timeout
6114     return env
6115
6116   def BuildHooksNodes(self):
6117     """Build hooks nodes.
6118
6119     """
6120     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6121     return (nl, nl)
6122
6123   def CheckPrereq(self):
6124     """Check prerequisites.
6125
6126     This checks that the instance is in the cluster.
6127
6128     """
6129     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6130     assert self.instance is not None, \
6131       "Cannot retrieve locked instance %s" % self.op.instance_name
6132
6133     self.primary_offline = \
6134       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6135
6136     if self.primary_offline and self.op.ignore_offline_nodes:
6137       self.proc.LogWarning("Ignoring offline primary node")
6138     else:
6139       _CheckNodeOnline(self, self.instance.primary_node)
6140
6141   def Exec(self, feedback_fn):
6142     """Shutdown the instance.
6143
6144     """
6145     instance = self.instance
6146     node_current = instance.primary_node
6147     timeout = self.op.timeout
6148
6149     if not self.op.no_remember:
6150       self.cfg.MarkInstanceDown(instance.name)
6151
6152     if self.primary_offline:
6153       assert self.op.ignore_offline_nodes
6154       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6155     else:
6156       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6157       msg = result.fail_msg
6158       if msg:
6159         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6160
6161       _ShutdownInstanceDisks(self, instance)
6162
6163
6164 class LUInstanceReinstall(LogicalUnit):
6165   """Reinstall an instance.
6166
6167   """
6168   HPATH = "instance-reinstall"
6169   HTYPE = constants.HTYPE_INSTANCE
6170   REQ_BGL = False
6171
6172   def ExpandNames(self):
6173     self._ExpandAndLockInstance()
6174
6175   def BuildHooksEnv(self):
6176     """Build hooks env.
6177
6178     This runs on master, primary and secondary nodes of the instance.
6179
6180     """
6181     return _BuildInstanceHookEnvByObject(self, self.instance)
6182
6183   def BuildHooksNodes(self):
6184     """Build hooks nodes.
6185
6186     """
6187     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6188     return (nl, nl)
6189
6190   def CheckPrereq(self):
6191     """Check prerequisites.
6192
6193     This checks that the instance is in the cluster and is not running.
6194
6195     """
6196     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6197     assert instance is not None, \
6198       "Cannot retrieve locked instance %s" % self.op.instance_name
6199     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6200                      " offline, cannot reinstall")
6201     for node in instance.secondary_nodes:
6202       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6203                        " cannot reinstall")
6204
6205     if instance.disk_template == constants.DT_DISKLESS:
6206       raise errors.OpPrereqError("Instance '%s' has no disks" %
6207                                  self.op.instance_name,
6208                                  errors.ECODE_INVAL)
6209     _CheckInstanceDown(self, instance, "cannot reinstall")
6210
6211     if self.op.os_type is not None:
6212       # OS verification
6213       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6214       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6215       instance_os = self.op.os_type
6216     else:
6217       instance_os = instance.os
6218
6219     nodelist = list(instance.all_nodes)
6220
6221     if self.op.osparams:
6222       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6223       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6224       self.os_inst = i_osdict # the new dict (without defaults)
6225     else:
6226       self.os_inst = None
6227
6228     self.instance = instance
6229
6230   def Exec(self, feedback_fn):
6231     """Reinstall the instance.
6232
6233     """
6234     inst = self.instance
6235
6236     if self.op.os_type is not None:
6237       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6238       inst.os = self.op.os_type
6239       # Write to configuration
6240       self.cfg.Update(inst, feedback_fn)
6241
6242     _StartInstanceDisks(self, inst, None)
6243     try:
6244       feedback_fn("Running the instance OS create scripts...")
6245       # FIXME: pass debug option from opcode to backend
6246       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6247                                              self.op.debug_level,
6248                                              osparams=self.os_inst)
6249       result.Raise("Could not install OS for instance %s on node %s" %
6250                    (inst.name, inst.primary_node))
6251     finally:
6252       _ShutdownInstanceDisks(self, inst)
6253
6254
6255 class LUInstanceRecreateDisks(LogicalUnit):
6256   """Recreate an instance's missing disks.
6257
6258   """
6259   HPATH = "instance-recreate-disks"
6260   HTYPE = constants.HTYPE_INSTANCE
6261   REQ_BGL = False
6262
6263   def CheckArguments(self):
6264     # normalise the disk list
6265     self.op.disks = sorted(frozenset(self.op.disks))
6266
6267   def ExpandNames(self):
6268     self._ExpandAndLockInstance()
6269     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6270     if self.op.nodes:
6271       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6272       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6273     else:
6274       self.needed_locks[locking.LEVEL_NODE] = []
6275
6276   def DeclareLocks(self, level):
6277     if level == locking.LEVEL_NODE:
6278       # if we replace the nodes, we only need to lock the old primary,
6279       # otherwise we need to lock all nodes for disk re-creation
6280       primary_only = bool(self.op.nodes)
6281       self._LockInstancesNodes(primary_only=primary_only)
6282
6283   def BuildHooksEnv(self):
6284     """Build hooks env.
6285
6286     This runs on master, primary and secondary nodes of the instance.
6287
6288     """
6289     return _BuildInstanceHookEnvByObject(self, self.instance)
6290
6291   def BuildHooksNodes(self):
6292     """Build hooks nodes.
6293
6294     """
6295     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6296     return (nl, nl)
6297
6298   def CheckPrereq(self):
6299     """Check prerequisites.
6300
6301     This checks that the instance is in the cluster and is not running.
6302
6303     """
6304     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6305     assert instance is not None, \
6306       "Cannot retrieve locked instance %s" % self.op.instance_name
6307     if self.op.nodes:
6308       if len(self.op.nodes) != len(instance.all_nodes):
6309         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6310                                    " %d replacement nodes were specified" %
6311                                    (instance.name, len(instance.all_nodes),
6312                                     len(self.op.nodes)),
6313                                    errors.ECODE_INVAL)
6314       assert instance.disk_template != constants.DT_DRBD8 or \
6315           len(self.op.nodes) == 2
6316       assert instance.disk_template != constants.DT_PLAIN or \
6317           len(self.op.nodes) == 1
6318       primary_node = self.op.nodes[0]
6319     else:
6320       primary_node = instance.primary_node
6321     _CheckNodeOnline(self, primary_node)
6322
6323     if instance.disk_template == constants.DT_DISKLESS:
6324       raise errors.OpPrereqError("Instance '%s' has no disks" %
6325                                  self.op.instance_name, errors.ECODE_INVAL)
6326     # if we replace nodes *and* the old primary is offline, we don't
6327     # check
6328     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6329     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6330     if not (self.op.nodes and old_pnode.offline):
6331       _CheckInstanceDown(self, instance, "cannot recreate disks")
6332
6333     if not self.op.disks:
6334       self.op.disks = range(len(instance.disks))
6335     else:
6336       for idx in self.op.disks:
6337         if idx >= len(instance.disks):
6338           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6339                                      errors.ECODE_INVAL)
6340     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6341       raise errors.OpPrereqError("Can't recreate disks partially and"
6342                                  " change the nodes at the same time",
6343                                  errors.ECODE_INVAL)
6344     self.instance = instance
6345
6346   def Exec(self, feedback_fn):
6347     """Recreate the disks.
6348
6349     """
6350     instance = self.instance
6351
6352     to_skip = []
6353     mods = [] # keeps track of needed logical_id changes
6354
6355     for idx, disk in enumerate(instance.disks):
6356       if idx not in self.op.disks: # disk idx has not been passed in
6357         to_skip.append(idx)
6358         continue
6359       # update secondaries for disks, if needed
6360       if self.op.nodes:
6361         if disk.dev_type == constants.LD_DRBD8:
6362           # need to update the nodes and minors
6363           assert len(self.op.nodes) == 2
6364           assert len(disk.logical_id) == 6 # otherwise disk internals
6365                                            # have changed
6366           (_, _, old_port, _, _, old_secret) = disk.logical_id
6367           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6368           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6369                     new_minors[0], new_minors[1], old_secret)
6370           assert len(disk.logical_id) == len(new_id)
6371           mods.append((idx, new_id))
6372
6373     # now that we have passed all asserts above, we can apply the mods
6374     # in a single run (to avoid partial changes)
6375     for idx, new_id in mods:
6376       instance.disks[idx].logical_id = new_id
6377
6378     # change primary node, if needed
6379     if self.op.nodes:
6380       instance.primary_node = self.op.nodes[0]
6381       self.LogWarning("Changing the instance's nodes, you will have to"
6382                       " remove any disks left on the older nodes manually")
6383
6384     if self.op.nodes:
6385       self.cfg.Update(instance, feedback_fn)
6386
6387     _CreateDisks(self, instance, to_skip=to_skip)
6388
6389
6390 class LUInstanceRename(LogicalUnit):
6391   """Rename an instance.
6392
6393   """
6394   HPATH = "instance-rename"
6395   HTYPE = constants.HTYPE_INSTANCE
6396
6397   def CheckArguments(self):
6398     """Check arguments.
6399
6400     """
6401     if self.op.ip_check and not self.op.name_check:
6402       # TODO: make the ip check more flexible and not depend on the name check
6403       raise errors.OpPrereqError("IP address check requires a name check",
6404                                  errors.ECODE_INVAL)
6405
6406   def BuildHooksEnv(self):
6407     """Build hooks env.
6408
6409     This runs on master, primary and secondary nodes of the instance.
6410
6411     """
6412     env = _BuildInstanceHookEnvByObject(self, self.instance)
6413     env["INSTANCE_NEW_NAME"] = self.op.new_name
6414     return env
6415
6416   def BuildHooksNodes(self):
6417     """Build hooks nodes.
6418
6419     """
6420     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6421     return (nl, nl)
6422
6423   def CheckPrereq(self):
6424     """Check prerequisites.
6425
6426     This checks that the instance is in the cluster and is not running.
6427
6428     """
6429     self.op.instance_name = _ExpandInstanceName(self.cfg,
6430                                                 self.op.instance_name)
6431     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6432     assert instance is not None
6433     _CheckNodeOnline(self, instance.primary_node)
6434     _CheckInstanceDown(self, instance, "cannot rename")
6435     self.instance = instance
6436
6437     new_name = self.op.new_name
6438     if self.op.name_check:
6439       hostname = netutils.GetHostname(name=new_name)
6440       if hostname.name != new_name:
6441         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6442                      hostname.name)
6443       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6444         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6445                                     " same as given hostname '%s'") %
6446                                     (hostname.name, self.op.new_name),
6447                                     errors.ECODE_INVAL)
6448       new_name = self.op.new_name = hostname.name
6449       if (self.op.ip_check and
6450           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6451         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6452                                    (hostname.ip, new_name),
6453                                    errors.ECODE_NOTUNIQUE)
6454
6455     instance_list = self.cfg.GetInstanceList()
6456     if new_name in instance_list and new_name != instance.name:
6457       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6458                                  new_name, errors.ECODE_EXISTS)
6459
6460   def Exec(self, feedback_fn):
6461     """Rename the instance.
6462
6463     """
6464     inst = self.instance
6465     old_name = inst.name
6466
6467     rename_file_storage = False
6468     if (inst.disk_template in constants.DTS_FILEBASED and
6469         self.op.new_name != inst.name):
6470       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6471       rename_file_storage = True
6472
6473     self.cfg.RenameInstance(inst.name, self.op.new_name)
6474     # Change the instance lock. This is definitely safe while we hold the BGL.
6475     # Otherwise the new lock would have to be added in acquired mode.
6476     assert self.REQ_BGL
6477     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6478     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6479
6480     # re-read the instance from the configuration after rename
6481     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6482
6483     if rename_file_storage:
6484       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6485       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6486                                                      old_file_storage_dir,
6487                                                      new_file_storage_dir)
6488       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6489                    " (but the instance has been renamed in Ganeti)" %
6490                    (inst.primary_node, old_file_storage_dir,
6491                     new_file_storage_dir))
6492
6493     _StartInstanceDisks(self, inst, None)
6494     try:
6495       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6496                                                  old_name, self.op.debug_level)
6497       msg = result.fail_msg
6498       if msg:
6499         msg = ("Could not run OS rename script for instance %s on node %s"
6500                " (but the instance has been renamed in Ganeti): %s" %
6501                (inst.name, inst.primary_node, msg))
6502         self.proc.LogWarning(msg)
6503     finally:
6504       _ShutdownInstanceDisks(self, inst)
6505
6506     return inst.name
6507
6508
6509 class LUInstanceRemove(LogicalUnit):
6510   """Remove an instance.
6511
6512   """
6513   HPATH = "instance-remove"
6514   HTYPE = constants.HTYPE_INSTANCE
6515   REQ_BGL = False
6516
6517   def ExpandNames(self):
6518     self._ExpandAndLockInstance()
6519     self.needed_locks[locking.LEVEL_NODE] = []
6520     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6521
6522   def DeclareLocks(self, level):
6523     if level == locking.LEVEL_NODE:
6524       self._LockInstancesNodes()
6525
6526   def BuildHooksEnv(self):
6527     """Build hooks env.
6528
6529     This runs on master, primary and secondary nodes of the instance.
6530
6531     """
6532     env = _BuildInstanceHookEnvByObject(self, self.instance)
6533     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6534     return env
6535
6536   def BuildHooksNodes(self):
6537     """Build hooks nodes.
6538
6539     """
6540     nl = [self.cfg.GetMasterNode()]
6541     nl_post = list(self.instance.all_nodes) + nl
6542     return (nl, nl_post)
6543
6544   def CheckPrereq(self):
6545     """Check prerequisites.
6546
6547     This checks that the instance is in the cluster.
6548
6549     """
6550     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6551     assert self.instance is not None, \
6552       "Cannot retrieve locked instance %s" % self.op.instance_name
6553
6554   def Exec(self, feedback_fn):
6555     """Remove the instance.
6556
6557     """
6558     instance = self.instance
6559     logging.info("Shutting down instance %s on node %s",
6560                  instance.name, instance.primary_node)
6561
6562     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6563                                              self.op.shutdown_timeout)
6564     msg = result.fail_msg
6565     if msg:
6566       if self.op.ignore_failures:
6567         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6568       else:
6569         raise errors.OpExecError("Could not shutdown instance %s on"
6570                                  " node %s: %s" %
6571                                  (instance.name, instance.primary_node, msg))
6572
6573     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6574
6575
6576 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6577   """Utility function to remove an instance.
6578
6579   """
6580   logging.info("Removing block devices for instance %s", instance.name)
6581
6582   if not _RemoveDisks(lu, instance):
6583     if not ignore_failures:
6584       raise errors.OpExecError("Can't remove instance's disks")
6585     feedback_fn("Warning: can't remove instance's disks")
6586
6587   logging.info("Removing instance %s out of cluster config", instance.name)
6588
6589   lu.cfg.RemoveInstance(instance.name)
6590
6591   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6592     "Instance lock removal conflict"
6593
6594   # Remove lock for the instance
6595   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6596
6597
6598 class LUInstanceQuery(NoHooksLU):
6599   """Logical unit for querying instances.
6600
6601   """
6602   # pylint: disable=W0142
6603   REQ_BGL = False
6604
6605   def CheckArguments(self):
6606     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6607                              self.op.output_fields, self.op.use_locking)
6608
6609   def ExpandNames(self):
6610     self.iq.ExpandNames(self)
6611
6612   def DeclareLocks(self, level):
6613     self.iq.DeclareLocks(self, level)
6614
6615   def Exec(self, feedback_fn):
6616     return self.iq.OldStyleQuery(self)
6617
6618
6619 class LUInstanceFailover(LogicalUnit):
6620   """Failover an instance.
6621
6622   """
6623   HPATH = "instance-failover"
6624   HTYPE = constants.HTYPE_INSTANCE
6625   REQ_BGL = False
6626
6627   def CheckArguments(self):
6628     """Check the arguments.
6629
6630     """
6631     self.iallocator = getattr(self.op, "iallocator", None)
6632     self.target_node = getattr(self.op, "target_node", None)
6633
6634   def ExpandNames(self):
6635     self._ExpandAndLockInstance()
6636
6637     if self.op.target_node is not None:
6638       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6639
6640     self.needed_locks[locking.LEVEL_NODE] = []
6641     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6642
6643     ignore_consistency = self.op.ignore_consistency
6644     shutdown_timeout = self.op.shutdown_timeout
6645     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6646                                        cleanup=False,
6647                                        failover=True,
6648                                        ignore_consistency=ignore_consistency,
6649                                        shutdown_timeout=shutdown_timeout)
6650     self.tasklets = [self._migrater]
6651
6652   def DeclareLocks(self, level):
6653     if level == locking.LEVEL_NODE:
6654       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6655       if instance.disk_template in constants.DTS_EXT_MIRROR:
6656         if self.op.target_node is None:
6657           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6658         else:
6659           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6660                                                    self.op.target_node]
6661         del self.recalculate_locks[locking.LEVEL_NODE]
6662       else:
6663         self._LockInstancesNodes()
6664
6665   def BuildHooksEnv(self):
6666     """Build hooks env.
6667
6668     This runs on master, primary and secondary nodes of the instance.
6669
6670     """
6671     instance = self._migrater.instance
6672     source_node = instance.primary_node
6673     target_node = self.op.target_node
6674     env = {
6675       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6676       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6677       "OLD_PRIMARY": source_node,
6678       "NEW_PRIMARY": target_node,
6679       }
6680
6681     if instance.disk_template in constants.DTS_INT_MIRROR:
6682       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6683       env["NEW_SECONDARY"] = source_node
6684     else:
6685       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6686
6687     env.update(_BuildInstanceHookEnvByObject(self, instance))
6688
6689     return env
6690
6691   def BuildHooksNodes(self):
6692     """Build hooks nodes.
6693
6694     """
6695     instance = self._migrater.instance
6696     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6697     return (nl, nl + [instance.primary_node])
6698
6699
6700 class LUInstanceMigrate(LogicalUnit):
6701   """Migrate an instance.
6702
6703   This is migration without shutting down, compared to the failover,
6704   which is done with shutdown.
6705
6706   """
6707   HPATH = "instance-migrate"
6708   HTYPE = constants.HTYPE_INSTANCE
6709   REQ_BGL = False
6710
6711   def ExpandNames(self):
6712     self._ExpandAndLockInstance()
6713
6714     if self.op.target_node is not None:
6715       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6716
6717     self.needed_locks[locking.LEVEL_NODE] = []
6718     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6719
6720     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6721                                        cleanup=self.op.cleanup,
6722                                        failover=False,
6723                                        fallback=self.op.allow_failover)
6724     self.tasklets = [self._migrater]
6725
6726   def DeclareLocks(self, level):
6727     if level == locking.LEVEL_NODE:
6728       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6729       if instance.disk_template in constants.DTS_EXT_MIRROR:
6730         if self.op.target_node is None:
6731           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6732         else:
6733           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6734                                                    self.op.target_node]
6735         del self.recalculate_locks[locking.LEVEL_NODE]
6736       else:
6737         self._LockInstancesNodes()
6738
6739   def BuildHooksEnv(self):
6740     """Build hooks env.
6741
6742     This runs on master, primary and secondary nodes of the instance.
6743
6744     """
6745     instance = self._migrater.instance
6746     source_node = instance.primary_node
6747     target_node = self.op.target_node
6748     env = _BuildInstanceHookEnvByObject(self, instance)
6749     env.update({
6750       "MIGRATE_LIVE": self._migrater.live,
6751       "MIGRATE_CLEANUP": self.op.cleanup,
6752       "OLD_PRIMARY": source_node,
6753       "NEW_PRIMARY": target_node,
6754       })
6755
6756     if instance.disk_template in constants.DTS_INT_MIRROR:
6757       env["OLD_SECONDARY"] = target_node
6758       env["NEW_SECONDARY"] = source_node
6759     else:
6760       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6761
6762     return env
6763
6764   def BuildHooksNodes(self):
6765     """Build hooks nodes.
6766
6767     """
6768     instance = self._migrater.instance
6769     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6770     return (nl, nl + [instance.primary_node])
6771
6772
6773 class LUInstanceMove(LogicalUnit):
6774   """Move an instance by data-copying.
6775
6776   """
6777   HPATH = "instance-move"
6778   HTYPE = constants.HTYPE_INSTANCE
6779   REQ_BGL = False
6780
6781   def ExpandNames(self):
6782     self._ExpandAndLockInstance()
6783     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6784     self.op.target_node = target_node
6785     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6786     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6787
6788   def DeclareLocks(self, level):
6789     if level == locking.LEVEL_NODE:
6790       self._LockInstancesNodes(primary_only=True)
6791
6792   def BuildHooksEnv(self):
6793     """Build hooks env.
6794
6795     This runs on master, primary and secondary nodes of the instance.
6796
6797     """
6798     env = {
6799       "TARGET_NODE": self.op.target_node,
6800       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6801       }
6802     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6803     return env
6804
6805   def BuildHooksNodes(self):
6806     """Build hooks nodes.
6807
6808     """
6809     nl = [
6810       self.cfg.GetMasterNode(),
6811       self.instance.primary_node,
6812       self.op.target_node,
6813       ]
6814     return (nl, nl)
6815
6816   def CheckPrereq(self):
6817     """Check prerequisites.
6818
6819     This checks that the instance is in the cluster.
6820
6821     """
6822     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6823     assert self.instance is not None, \
6824       "Cannot retrieve locked instance %s" % self.op.instance_name
6825
6826     node = self.cfg.GetNodeInfo(self.op.target_node)
6827     assert node is not None, \
6828       "Cannot retrieve locked node %s" % self.op.target_node
6829
6830     self.target_node = target_node = node.name
6831
6832     if target_node == instance.primary_node:
6833       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6834                                  (instance.name, target_node),
6835                                  errors.ECODE_STATE)
6836
6837     bep = self.cfg.GetClusterInfo().FillBE(instance)
6838
6839     for idx, dsk in enumerate(instance.disks):
6840       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6841         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6842                                    " cannot copy" % idx, errors.ECODE_STATE)
6843
6844     _CheckNodeOnline(self, target_node)
6845     _CheckNodeNotDrained(self, target_node)
6846     _CheckNodeVmCapable(self, target_node)
6847
6848     if instance.admin_up:
6849       # check memory requirements on the secondary node
6850       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6851                            instance.name, bep[constants.BE_MEMORY],
6852                            instance.hypervisor)
6853     else:
6854       self.LogInfo("Not checking memory on the secondary node as"
6855                    " instance will not be started")
6856
6857     # check bridge existance
6858     _CheckInstanceBridgesExist(self, instance, node=target_node)
6859
6860   def Exec(self, feedback_fn):
6861     """Move an instance.
6862
6863     The move is done by shutting it down on its present node, copying
6864     the data over (slow) and starting it on the new node.
6865
6866     """
6867     instance = self.instance
6868
6869     source_node = instance.primary_node
6870     target_node = self.target_node
6871
6872     self.LogInfo("Shutting down instance %s on source node %s",
6873                  instance.name, source_node)
6874
6875     result = self.rpc.call_instance_shutdown(source_node, instance,
6876                                              self.op.shutdown_timeout)
6877     msg = result.fail_msg
6878     if msg:
6879       if self.op.ignore_consistency:
6880         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6881                              " Proceeding anyway. Please make sure node"
6882                              " %s is down. Error details: %s",
6883                              instance.name, source_node, source_node, msg)
6884       else:
6885         raise errors.OpExecError("Could not shutdown instance %s on"
6886                                  " node %s: %s" %
6887                                  (instance.name, source_node, msg))
6888
6889     # create the target disks
6890     try:
6891       _CreateDisks(self, instance, target_node=target_node)
6892     except errors.OpExecError:
6893       self.LogWarning("Device creation failed, reverting...")
6894       try:
6895         _RemoveDisks(self, instance, target_node=target_node)
6896       finally:
6897         self.cfg.ReleaseDRBDMinors(instance.name)
6898         raise
6899
6900     cluster_name = self.cfg.GetClusterInfo().cluster_name
6901
6902     errs = []
6903     # activate, get path, copy the data over
6904     for idx, disk in enumerate(instance.disks):
6905       self.LogInfo("Copying data for disk %d", idx)
6906       result = self.rpc.call_blockdev_assemble(target_node, disk,
6907                                                instance.name, True, idx)
6908       if result.fail_msg:
6909         self.LogWarning("Can't assemble newly created disk %d: %s",
6910                         idx, result.fail_msg)
6911         errs.append(result.fail_msg)
6912         break
6913       dev_path = result.payload
6914       result = self.rpc.call_blockdev_export(source_node, disk,
6915                                              target_node, dev_path,
6916                                              cluster_name)
6917       if result.fail_msg:
6918         self.LogWarning("Can't copy data over for disk %d: %s",
6919                         idx, result.fail_msg)
6920         errs.append(result.fail_msg)
6921         break
6922
6923     if errs:
6924       self.LogWarning("Some disks failed to copy, aborting")
6925       try:
6926         _RemoveDisks(self, instance, target_node=target_node)
6927       finally:
6928         self.cfg.ReleaseDRBDMinors(instance.name)
6929         raise errors.OpExecError("Errors during disk copy: %s" %
6930                                  (",".join(errs),))
6931
6932     instance.primary_node = target_node
6933     self.cfg.Update(instance, feedback_fn)
6934
6935     self.LogInfo("Removing the disks on the original node")
6936     _RemoveDisks(self, instance, target_node=source_node)
6937
6938     # Only start the instance if it's marked as up
6939     if instance.admin_up:
6940       self.LogInfo("Starting instance %s on node %s",
6941                    instance.name, target_node)
6942
6943       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6944                                            ignore_secondaries=True)
6945       if not disks_ok:
6946         _ShutdownInstanceDisks(self, instance)
6947         raise errors.OpExecError("Can't activate the instance's disks")
6948
6949       result = self.rpc.call_instance_start(target_node, instance,
6950                                             None, None, False)
6951       msg = result.fail_msg
6952       if msg:
6953         _ShutdownInstanceDisks(self, instance)
6954         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6955                                  (instance.name, target_node, msg))
6956
6957
6958 class LUNodeMigrate(LogicalUnit):
6959   """Migrate all instances from a node.
6960
6961   """
6962   HPATH = "node-migrate"
6963   HTYPE = constants.HTYPE_NODE
6964   REQ_BGL = False
6965
6966   def CheckArguments(self):
6967     pass
6968
6969   def ExpandNames(self):
6970     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6971
6972     self.share_locks = _ShareAll()
6973     self.needed_locks = {
6974       locking.LEVEL_NODE: [self.op.node_name],
6975       }
6976
6977   def BuildHooksEnv(self):
6978     """Build hooks env.
6979
6980     This runs on the master, the primary and all the secondaries.
6981
6982     """
6983     return {
6984       "NODE_NAME": self.op.node_name,
6985       }
6986
6987   def BuildHooksNodes(self):
6988     """Build hooks nodes.
6989
6990     """
6991     nl = [self.cfg.GetMasterNode()]
6992     return (nl, nl)
6993
6994   def CheckPrereq(self):
6995     pass
6996
6997   def Exec(self, feedback_fn):
6998     # Prepare jobs for migration instances
6999     jobs = [
7000       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7001                                  mode=self.op.mode,
7002                                  live=self.op.live,
7003                                  iallocator=self.op.iallocator,
7004                                  target_node=self.op.target_node)]
7005       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7006       ]
7007
7008     # TODO: Run iallocator in this opcode and pass correct placement options to
7009     # OpInstanceMigrate. Since other jobs can modify the cluster between
7010     # running the iallocator and the actual migration, a good consistency model
7011     # will have to be found.
7012
7013     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7014             frozenset([self.op.node_name]))
7015
7016     return ResultWithJobs(jobs)
7017
7018
7019 class TLMigrateInstance(Tasklet):
7020   """Tasklet class for instance migration.
7021
7022   @type live: boolean
7023   @ivar live: whether the migration will be done live or non-live;
7024       this variable is initalized only after CheckPrereq has run
7025   @type cleanup: boolean
7026   @ivar cleanup: Wheater we cleanup from a failed migration
7027   @type iallocator: string
7028   @ivar iallocator: The iallocator used to determine target_node
7029   @type target_node: string
7030   @ivar target_node: If given, the target_node to reallocate the instance to
7031   @type failover: boolean
7032   @ivar failover: Whether operation results in failover or migration
7033   @type fallback: boolean
7034   @ivar fallback: Whether fallback to failover is allowed if migration not
7035                   possible
7036   @type ignore_consistency: boolean
7037   @ivar ignore_consistency: Wheter we should ignore consistency between source
7038                             and target node
7039   @type shutdown_timeout: int
7040   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7041
7042   """
7043   def __init__(self, lu, instance_name, cleanup=False,
7044                failover=False, fallback=False,
7045                ignore_consistency=False,
7046                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7047     """Initializes this class.
7048
7049     """
7050     Tasklet.__init__(self, lu)
7051
7052     # Parameters
7053     self.instance_name = instance_name
7054     self.cleanup = cleanup
7055     self.live = False # will be overridden later
7056     self.failover = failover
7057     self.fallback = fallback
7058     self.ignore_consistency = ignore_consistency
7059     self.shutdown_timeout = shutdown_timeout
7060
7061   def CheckPrereq(self):
7062     """Check prerequisites.
7063
7064     This checks that the instance is in the cluster.
7065
7066     """
7067     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7068     instance = self.cfg.GetInstanceInfo(instance_name)
7069     assert instance is not None
7070     self.instance = instance
7071
7072     if (not self.cleanup and not instance.admin_up and not self.failover and
7073         self.fallback):
7074       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7075                       " to failover")
7076       self.failover = True
7077
7078     if instance.disk_template not in constants.DTS_MIRRORED:
7079       if self.failover:
7080         text = "failovers"
7081       else:
7082         text = "migrations"
7083       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7084                                  " %s" % (instance.disk_template, text),
7085                                  errors.ECODE_STATE)
7086
7087     if instance.disk_template in constants.DTS_EXT_MIRROR:
7088       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7089
7090       if self.lu.op.iallocator:
7091         self._RunAllocator()
7092       else:
7093         # We set set self.target_node as it is required by
7094         # BuildHooksEnv
7095         self.target_node = self.lu.op.target_node
7096
7097       # self.target_node is already populated, either directly or by the
7098       # iallocator run
7099       target_node = self.target_node
7100       if self.target_node == instance.primary_node:
7101         raise errors.OpPrereqError("Cannot migrate instance %s"
7102                                    " to its primary (%s)" %
7103                                    (instance.name, instance.primary_node))
7104
7105       if len(self.lu.tasklets) == 1:
7106         # It is safe to release locks only when we're the only tasklet
7107         # in the LU
7108         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7109                       keep=[instance.primary_node, self.target_node])
7110
7111     else:
7112       secondary_nodes = instance.secondary_nodes
7113       if not secondary_nodes:
7114         raise errors.ConfigurationError("No secondary node but using"
7115                                         " %s disk template" %
7116                                         instance.disk_template)
7117       target_node = secondary_nodes[0]
7118       if self.lu.op.iallocator or (self.lu.op.target_node and
7119                                    self.lu.op.target_node != target_node):
7120         if self.failover:
7121           text = "failed over"
7122         else:
7123           text = "migrated"
7124         raise errors.OpPrereqError("Instances with disk template %s cannot"
7125                                    " be %s to arbitrary nodes"
7126                                    " (neither an iallocator nor a target"
7127                                    " node can be passed)" %
7128                                    (instance.disk_template, text),
7129                                    errors.ECODE_INVAL)
7130
7131     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7132
7133     # check memory requirements on the secondary node
7134     if not self.failover or instance.admin_up:
7135       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7136                            instance.name, i_be[constants.BE_MEMORY],
7137                            instance.hypervisor)
7138     else:
7139       self.lu.LogInfo("Not checking memory on the secondary node as"
7140                       " instance will not be started")
7141
7142     # check bridge existance
7143     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7144
7145     if not self.cleanup:
7146       _CheckNodeNotDrained(self.lu, target_node)
7147       if not self.failover:
7148         result = self.rpc.call_instance_migratable(instance.primary_node,
7149                                                    instance)
7150         if result.fail_msg and self.fallback:
7151           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7152                           " failover")
7153           self.failover = True
7154         else:
7155           result.Raise("Can't migrate, please use failover",
7156                        prereq=True, ecode=errors.ECODE_STATE)
7157
7158     assert not (self.failover and self.cleanup)
7159
7160     if not self.failover:
7161       if self.lu.op.live is not None and self.lu.op.mode is not None:
7162         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7163                                    " parameters are accepted",
7164                                    errors.ECODE_INVAL)
7165       if self.lu.op.live is not None:
7166         if self.lu.op.live:
7167           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7168         else:
7169           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7170         # reset the 'live' parameter to None so that repeated
7171         # invocations of CheckPrereq do not raise an exception
7172         self.lu.op.live = None
7173       elif self.lu.op.mode is None:
7174         # read the default value from the hypervisor
7175         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7176                                                 skip_globals=False)
7177         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7178
7179       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7180     else:
7181       # Failover is never live
7182       self.live = False
7183
7184   def _RunAllocator(self):
7185     """Run the allocator based on input opcode.
7186
7187     """
7188     ial = IAllocator(self.cfg, self.rpc,
7189                      mode=constants.IALLOCATOR_MODE_RELOC,
7190                      name=self.instance_name,
7191                      # TODO See why hail breaks with a single node below
7192                      relocate_from=[self.instance.primary_node,
7193                                     self.instance.primary_node],
7194                      )
7195
7196     ial.Run(self.lu.op.iallocator)
7197
7198     if not ial.success:
7199       raise errors.OpPrereqError("Can't compute nodes using"
7200                                  " iallocator '%s': %s" %
7201                                  (self.lu.op.iallocator, ial.info),
7202                                  errors.ECODE_NORES)
7203     if len(ial.result) != ial.required_nodes:
7204       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7205                                  " of nodes (%s), required %s" %
7206                                  (self.lu.op.iallocator, len(ial.result),
7207                                   ial.required_nodes), errors.ECODE_FAULT)
7208     self.target_node = ial.result[0]
7209     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7210                  self.instance_name, self.lu.op.iallocator,
7211                  utils.CommaJoin(ial.result))
7212
7213   def _WaitUntilSync(self):
7214     """Poll with custom rpc for disk sync.
7215
7216     This uses our own step-based rpc call.
7217
7218     """
7219     self.feedback_fn("* wait until resync is done")
7220     all_done = False
7221     while not all_done:
7222       all_done = True
7223       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7224                                             self.nodes_ip,
7225                                             self.instance.disks)
7226       min_percent = 100
7227       for node, nres in result.items():
7228         nres.Raise("Cannot resync disks on node %s" % node)
7229         node_done, node_percent = nres.payload
7230         all_done = all_done and node_done
7231         if node_percent is not None:
7232           min_percent = min(min_percent, node_percent)
7233       if not all_done:
7234         if min_percent < 100:
7235           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7236         time.sleep(2)
7237
7238   def _EnsureSecondary(self, node):
7239     """Demote a node to secondary.
7240
7241     """
7242     self.feedback_fn("* switching node %s to secondary mode" % node)
7243
7244     for dev in self.instance.disks:
7245       self.cfg.SetDiskID(dev, node)
7246
7247     result = self.rpc.call_blockdev_close(node, self.instance.name,
7248                                           self.instance.disks)
7249     result.Raise("Cannot change disk to secondary on node %s" % node)
7250
7251   def _GoStandalone(self):
7252     """Disconnect from the network.
7253
7254     """
7255     self.feedback_fn("* changing into standalone mode")
7256     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7257                                                self.instance.disks)
7258     for node, nres in result.items():
7259       nres.Raise("Cannot disconnect disks node %s" % node)
7260
7261   def _GoReconnect(self, multimaster):
7262     """Reconnect to the network.
7263
7264     """
7265     if multimaster:
7266       msg = "dual-master"
7267     else:
7268       msg = "single-master"
7269     self.feedback_fn("* changing disks into %s mode" % msg)
7270     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7271                                            self.instance.disks,
7272                                            self.instance.name, multimaster)
7273     for node, nres in result.items():
7274       nres.Raise("Cannot change disks config on node %s" % node)
7275
7276   def _ExecCleanup(self):
7277     """Try to cleanup after a failed migration.
7278
7279     The cleanup is done by:
7280       - check that the instance is running only on one node
7281         (and update the config if needed)
7282       - change disks on its secondary node to secondary
7283       - wait until disks are fully synchronized
7284       - disconnect from the network
7285       - change disks into single-master mode
7286       - wait again until disks are fully synchronized
7287
7288     """
7289     instance = self.instance
7290     target_node = self.target_node
7291     source_node = self.source_node
7292
7293     # check running on only one node
7294     self.feedback_fn("* checking where the instance actually runs"
7295                      " (if this hangs, the hypervisor might be in"
7296                      " a bad state)")
7297     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7298     for node, result in ins_l.items():
7299       result.Raise("Can't contact node %s" % node)
7300
7301     runningon_source = instance.name in ins_l[source_node].payload
7302     runningon_target = instance.name in ins_l[target_node].payload
7303
7304     if runningon_source and runningon_target:
7305       raise errors.OpExecError("Instance seems to be running on two nodes,"
7306                                " or the hypervisor is confused; you will have"
7307                                " to ensure manually that it runs only on one"
7308                                " and restart this operation")
7309
7310     if not (runningon_source or runningon_target):
7311       raise errors.OpExecError("Instance does not seem to be running at all;"
7312                                " in this case it's safer to repair by"
7313                                " running 'gnt-instance stop' to ensure disk"
7314                                " shutdown, and then restarting it")
7315
7316     if runningon_target:
7317       # the migration has actually succeeded, we need to update the config
7318       self.feedback_fn("* instance running on secondary node (%s),"
7319                        " updating config" % target_node)
7320       instance.primary_node = target_node
7321       self.cfg.Update(instance, self.feedback_fn)
7322       demoted_node = source_node
7323     else:
7324       self.feedback_fn("* instance confirmed to be running on its"
7325                        " primary node (%s)" % source_node)
7326       demoted_node = target_node
7327
7328     if instance.disk_template in constants.DTS_INT_MIRROR:
7329       self._EnsureSecondary(demoted_node)
7330       try:
7331         self._WaitUntilSync()
7332       except errors.OpExecError:
7333         # we ignore here errors, since if the device is standalone, it
7334         # won't be able to sync
7335         pass
7336       self._GoStandalone()
7337       self._GoReconnect(False)
7338       self._WaitUntilSync()
7339
7340     self.feedback_fn("* done")
7341
7342   def _RevertDiskStatus(self):
7343     """Try to revert the disk status after a failed migration.
7344
7345     """
7346     target_node = self.target_node
7347     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7348       return
7349
7350     try:
7351       self._EnsureSecondary(target_node)
7352       self._GoStandalone()
7353       self._GoReconnect(False)
7354       self._WaitUntilSync()
7355     except errors.OpExecError, err:
7356       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7357                          " please try to recover the instance manually;"
7358                          " error '%s'" % str(err))
7359
7360   def _AbortMigration(self):
7361     """Call the hypervisor code to abort a started migration.
7362
7363     """
7364     instance = self.instance
7365     target_node = self.target_node
7366     migration_info = self.migration_info
7367
7368     abort_result = self.rpc.call_finalize_migration(target_node,
7369                                                     instance,
7370                                                     migration_info,
7371                                                     False)
7372     abort_msg = abort_result.fail_msg
7373     if abort_msg:
7374       logging.error("Aborting migration failed on target node %s: %s",
7375                     target_node, abort_msg)
7376       # Don't raise an exception here, as we stil have to try to revert the
7377       # disk status, even if this step failed.
7378
7379   def _ExecMigration(self):
7380     """Migrate an instance.
7381
7382     The migrate is done by:
7383       - change the disks into dual-master mode
7384       - wait until disks are fully synchronized again
7385       - migrate the instance
7386       - change disks on the new secondary node (the old primary) to secondary
7387       - wait until disks are fully synchronized
7388       - change disks into single-master mode
7389
7390     """
7391     instance = self.instance
7392     target_node = self.target_node
7393     source_node = self.source_node
7394
7395     self.feedback_fn("* checking disk consistency between source and target")
7396     for dev in instance.disks:
7397       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7398         raise errors.OpExecError("Disk %s is degraded or not fully"
7399                                  " synchronized on target node,"
7400                                  " aborting migration" % dev.iv_name)
7401
7402     # First get the migration information from the remote node
7403     result = self.rpc.call_migration_info(source_node, instance)
7404     msg = result.fail_msg
7405     if msg:
7406       log_err = ("Failed fetching source migration information from %s: %s" %
7407                  (source_node, msg))
7408       logging.error(log_err)
7409       raise errors.OpExecError(log_err)
7410
7411     self.migration_info = migration_info = result.payload
7412
7413     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7414       # Then switch the disks to master/master mode
7415       self._EnsureSecondary(target_node)
7416       self._GoStandalone()
7417       self._GoReconnect(True)
7418       self._WaitUntilSync()
7419
7420     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7421     result = self.rpc.call_accept_instance(target_node,
7422                                            instance,
7423                                            migration_info,
7424                                            self.nodes_ip[target_node])
7425
7426     msg = result.fail_msg
7427     if msg:
7428       logging.error("Instance pre-migration failed, trying to revert"
7429                     " disk status: %s", msg)
7430       self.feedback_fn("Pre-migration failed, aborting")
7431       self._AbortMigration()
7432       self._RevertDiskStatus()
7433       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7434                                (instance.name, msg))
7435
7436     self.feedback_fn("* migrating instance to %s" % target_node)
7437     result = self.rpc.call_instance_migrate(source_node, instance,
7438                                             self.nodes_ip[target_node],
7439                                             self.live)
7440     msg = result.fail_msg
7441     if msg:
7442       logging.error("Instance migration failed, trying to revert"
7443                     " disk status: %s", msg)
7444       self.feedback_fn("Migration failed, aborting")
7445       self._AbortMigration()
7446       self._RevertDiskStatus()
7447       raise errors.OpExecError("Could not migrate instance %s: %s" %
7448                                (instance.name, msg))
7449
7450     instance.primary_node = target_node
7451     # distribute new instance config to the other nodes
7452     self.cfg.Update(instance, self.feedback_fn)
7453
7454     result = self.rpc.call_finalize_migration(target_node,
7455                                               instance,
7456                                               migration_info,
7457                                               True)
7458     msg = result.fail_msg
7459     if msg:
7460       logging.error("Instance migration succeeded, but finalization failed:"
7461                     " %s", msg)
7462       raise errors.OpExecError("Could not finalize instance migration: %s" %
7463                                msg)
7464
7465     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7466       self._EnsureSecondary(source_node)
7467       self._WaitUntilSync()
7468       self._GoStandalone()
7469       self._GoReconnect(False)
7470       self._WaitUntilSync()
7471
7472     self.feedback_fn("* done")
7473
7474   def _ExecFailover(self):
7475     """Failover an instance.
7476
7477     The failover is done by shutting it down on its present node and
7478     starting it on the secondary.
7479
7480     """
7481     instance = self.instance
7482     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7483
7484     source_node = instance.primary_node
7485     target_node = self.target_node
7486
7487     if instance.admin_up:
7488       self.feedback_fn("* checking disk consistency between source and target")
7489       for dev in instance.disks:
7490         # for drbd, these are drbd over lvm
7491         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7492           if primary_node.offline:
7493             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7494                              " target node %s" %
7495                              (primary_node.name, dev.iv_name, target_node))
7496           elif not self.ignore_consistency:
7497             raise errors.OpExecError("Disk %s is degraded on target node,"
7498                                      " aborting failover" % dev.iv_name)
7499     else:
7500       self.feedback_fn("* not checking disk consistency as instance is not"
7501                        " running")
7502
7503     self.feedback_fn("* shutting down instance on source node")
7504     logging.info("Shutting down instance %s on node %s",
7505                  instance.name, source_node)
7506
7507     result = self.rpc.call_instance_shutdown(source_node, instance,
7508                                              self.shutdown_timeout)
7509     msg = result.fail_msg
7510     if msg:
7511       if self.ignore_consistency or primary_node.offline:
7512         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7513                            " proceeding anyway; please make sure node"
7514                            " %s is down; error details: %s",
7515                            instance.name, source_node, source_node, msg)
7516       else:
7517         raise errors.OpExecError("Could not shutdown instance %s on"
7518                                  " node %s: %s" %
7519                                  (instance.name, source_node, msg))
7520
7521     self.feedback_fn("* deactivating the instance's disks on source node")
7522     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7523       raise errors.OpExecError("Can't shut down the instance's disks")
7524
7525     instance.primary_node = target_node
7526     # distribute new instance config to the other nodes
7527     self.cfg.Update(instance, self.feedback_fn)
7528
7529     # Only start the instance if it's marked as up
7530     if instance.admin_up:
7531       self.feedback_fn("* activating the instance's disks on target node %s" %
7532                        target_node)
7533       logging.info("Starting instance %s on node %s",
7534                    instance.name, target_node)
7535
7536       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7537                                            ignore_secondaries=True)
7538       if not disks_ok:
7539         _ShutdownInstanceDisks(self.lu, instance)
7540         raise errors.OpExecError("Can't activate the instance's disks")
7541
7542       self.feedback_fn("* starting the instance on the target node %s" %
7543                        target_node)
7544       result = self.rpc.call_instance_start(target_node, instance, None, None,
7545                                             False)
7546       msg = result.fail_msg
7547       if msg:
7548         _ShutdownInstanceDisks(self.lu, instance)
7549         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7550                                  (instance.name, target_node, msg))
7551
7552   def Exec(self, feedback_fn):
7553     """Perform the migration.
7554
7555     """
7556     self.feedback_fn = feedback_fn
7557     self.source_node = self.instance.primary_node
7558
7559     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7560     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7561       self.target_node = self.instance.secondary_nodes[0]
7562       # Otherwise self.target_node has been populated either
7563       # directly, or through an iallocator.
7564
7565     self.all_nodes = [self.source_node, self.target_node]
7566     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7567                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7568
7569     if self.failover:
7570       feedback_fn("Failover instance %s" % self.instance.name)
7571       self._ExecFailover()
7572     else:
7573       feedback_fn("Migrating instance %s" % self.instance.name)
7574
7575       if self.cleanup:
7576         return self._ExecCleanup()
7577       else:
7578         return self._ExecMigration()
7579
7580
7581 def _CreateBlockDev(lu, node, instance, device, force_create,
7582                     info, force_open):
7583   """Create a tree of block devices on a given node.
7584
7585   If this device type has to be created on secondaries, create it and
7586   all its children.
7587
7588   If not, just recurse to children keeping the same 'force' value.
7589
7590   @param lu: the lu on whose behalf we execute
7591   @param node: the node on which to create the device
7592   @type instance: L{objects.Instance}
7593   @param instance: the instance which owns the device
7594   @type device: L{objects.Disk}
7595   @param device: the device to create
7596   @type force_create: boolean
7597   @param force_create: whether to force creation of this device; this
7598       will be change to True whenever we find a device which has
7599       CreateOnSecondary() attribute
7600   @param info: the extra 'metadata' we should attach to the device
7601       (this will be represented as a LVM tag)
7602   @type force_open: boolean
7603   @param force_open: this parameter will be passes to the
7604       L{backend.BlockdevCreate} function where it specifies
7605       whether we run on primary or not, and it affects both
7606       the child assembly and the device own Open() execution
7607
7608   """
7609   if device.CreateOnSecondary():
7610     force_create = True
7611
7612   if device.children:
7613     for child in device.children:
7614       _CreateBlockDev(lu, node, instance, child, force_create,
7615                       info, force_open)
7616
7617   if not force_create:
7618     return
7619
7620   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7621
7622
7623 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7624   """Create a single block device on a given node.
7625
7626   This will not recurse over children of the device, so they must be
7627   created in advance.
7628
7629   @param lu: the lu on whose behalf we execute
7630   @param node: the node on which to create the device
7631   @type instance: L{objects.Instance}
7632   @param instance: the instance which owns the device
7633   @type device: L{objects.Disk}
7634   @param device: the device to create
7635   @param info: the extra 'metadata' we should attach to the device
7636       (this will be represented as a LVM tag)
7637   @type force_open: boolean
7638   @param force_open: this parameter will be passes to the
7639       L{backend.BlockdevCreate} function where it specifies
7640       whether we run on primary or not, and it affects both
7641       the child assembly and the device own Open() execution
7642
7643   """
7644   lu.cfg.SetDiskID(device, node)
7645   result = lu.rpc.call_blockdev_create(node, device, device.size,
7646                                        instance.name, force_open, info)
7647   result.Raise("Can't create block device %s on"
7648                " node %s for instance %s" % (device, node, instance.name))
7649   if device.physical_id is None:
7650     device.physical_id = result.payload
7651
7652
7653 def _GenerateUniqueNames(lu, exts):
7654   """Generate a suitable LV name.
7655
7656   This will generate a logical volume name for the given instance.
7657
7658   """
7659   results = []
7660   for val in exts:
7661     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7662     results.append("%s%s" % (new_id, val))
7663   return results
7664
7665
7666 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7667                          iv_name, p_minor, s_minor):
7668   """Generate a drbd8 device complete with its children.
7669
7670   """
7671   assert len(vgnames) == len(names) == 2
7672   port = lu.cfg.AllocatePort()
7673   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7674   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7675                           logical_id=(vgnames[0], names[0]))
7676   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7677                           logical_id=(vgnames[1], names[1]))
7678   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7679                           logical_id=(primary, secondary, port,
7680                                       p_minor, s_minor,
7681                                       shared_secret),
7682                           children=[dev_data, dev_meta],
7683                           iv_name=iv_name)
7684   return drbd_dev
7685
7686
7687 def _GenerateDiskTemplate(lu, template_name,
7688                           instance_name, primary_node,
7689                           secondary_nodes, disk_info,
7690                           file_storage_dir, file_driver,
7691                           base_index, feedback_fn):
7692   """Generate the entire disk layout for a given template type.
7693
7694   """
7695   #TODO: compute space requirements
7696
7697   vgname = lu.cfg.GetVGName()
7698   disk_count = len(disk_info)
7699   disks = []
7700   if template_name == constants.DT_DISKLESS:
7701     pass
7702   elif template_name == constants.DT_PLAIN:
7703     if len(secondary_nodes) != 0:
7704       raise errors.ProgrammerError("Wrong template configuration")
7705
7706     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7707                                       for i in range(disk_count)])
7708     for idx, disk in enumerate(disk_info):
7709       disk_index = idx + base_index
7710       vg = disk.get(constants.IDISK_VG, vgname)
7711       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7712       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7713                               size=disk[constants.IDISK_SIZE],
7714                               logical_id=(vg, names[idx]),
7715                               iv_name="disk/%d" % disk_index,
7716                               mode=disk[constants.IDISK_MODE])
7717       disks.append(disk_dev)
7718   elif template_name == constants.DT_DRBD8:
7719     if len(secondary_nodes) != 1:
7720       raise errors.ProgrammerError("Wrong template configuration")
7721     remote_node = secondary_nodes[0]
7722     minors = lu.cfg.AllocateDRBDMinor(
7723       [primary_node, remote_node] * len(disk_info), instance_name)
7724
7725     names = []
7726     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7727                                                for i in range(disk_count)]):
7728       names.append(lv_prefix + "_data")
7729       names.append(lv_prefix + "_meta")
7730     for idx, disk in enumerate(disk_info):
7731       disk_index = idx + base_index
7732       data_vg = disk.get(constants.IDISK_VG, vgname)
7733       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7734       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7735                                       disk[constants.IDISK_SIZE],
7736                                       [data_vg, meta_vg],
7737                                       names[idx * 2:idx * 2 + 2],
7738                                       "disk/%d" % disk_index,
7739                                       minors[idx * 2], minors[idx * 2 + 1])
7740       disk_dev.mode = disk[constants.IDISK_MODE]
7741       disks.append(disk_dev)
7742   elif template_name == constants.DT_FILE:
7743     if len(secondary_nodes) != 0:
7744       raise errors.ProgrammerError("Wrong template configuration")
7745
7746     opcodes.RequireFileStorage()
7747
7748     for idx, disk in enumerate(disk_info):
7749       disk_index = idx + base_index
7750       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7751                               size=disk[constants.IDISK_SIZE],
7752                               iv_name="disk/%d" % disk_index,
7753                               logical_id=(file_driver,
7754                                           "%s/disk%d" % (file_storage_dir,
7755                                                          disk_index)),
7756                               mode=disk[constants.IDISK_MODE])
7757       disks.append(disk_dev)
7758   elif template_name == constants.DT_SHARED_FILE:
7759     if len(secondary_nodes) != 0:
7760       raise errors.ProgrammerError("Wrong template configuration")
7761
7762     opcodes.RequireSharedFileStorage()
7763
7764     for idx, disk in enumerate(disk_info):
7765       disk_index = idx + base_index
7766       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7767                               size=disk[constants.IDISK_SIZE],
7768                               iv_name="disk/%d" % disk_index,
7769                               logical_id=(file_driver,
7770                                           "%s/disk%d" % (file_storage_dir,
7771                                                          disk_index)),
7772                               mode=disk[constants.IDISK_MODE])
7773       disks.append(disk_dev)
7774   elif template_name == constants.DT_BLOCK:
7775     if len(secondary_nodes) != 0:
7776       raise errors.ProgrammerError("Wrong template configuration")
7777
7778     for idx, disk in enumerate(disk_info):
7779       disk_index = idx + base_index
7780       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7781                               size=disk[constants.IDISK_SIZE],
7782                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7783                                           disk[constants.IDISK_ADOPT]),
7784                               iv_name="disk/%d" % disk_index,
7785                               mode=disk[constants.IDISK_MODE])
7786       disks.append(disk_dev)
7787
7788   else:
7789     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7790   return disks
7791
7792
7793 def _GetInstanceInfoText(instance):
7794   """Compute that text that should be added to the disk's metadata.
7795
7796   """
7797   return "originstname+%s" % instance.name
7798
7799
7800 def _CalcEta(time_taken, written, total_size):
7801   """Calculates the ETA based on size written and total size.
7802
7803   @param time_taken: The time taken so far
7804   @param written: amount written so far
7805   @param total_size: The total size of data to be written
7806   @return: The remaining time in seconds
7807
7808   """
7809   avg_time = time_taken / float(written)
7810   return (total_size - written) * avg_time
7811
7812
7813 def _WipeDisks(lu, instance):
7814   """Wipes instance disks.
7815
7816   @type lu: L{LogicalUnit}
7817   @param lu: the logical unit on whose behalf we execute
7818   @type instance: L{objects.Instance}
7819   @param instance: the instance whose disks we should create
7820   @return: the success of the wipe
7821
7822   """
7823   node = instance.primary_node
7824
7825   for device in instance.disks:
7826     lu.cfg.SetDiskID(device, node)
7827
7828   logging.info("Pause sync of instance %s disks", instance.name)
7829   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7830
7831   for idx, success in enumerate(result.payload):
7832     if not success:
7833       logging.warn("pause-sync of instance %s for disks %d failed",
7834                    instance.name, idx)
7835
7836   try:
7837     for idx, device in enumerate(instance.disks):
7838       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7839       # MAX_WIPE_CHUNK at max
7840       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7841                             constants.MIN_WIPE_CHUNK_PERCENT)
7842       # we _must_ make this an int, otherwise rounding errors will
7843       # occur
7844       wipe_chunk_size = int(wipe_chunk_size)
7845
7846       lu.LogInfo("* Wiping disk %d", idx)
7847       logging.info("Wiping disk %d for instance %s, node %s using"
7848                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7849
7850       offset = 0
7851       size = device.size
7852       last_output = 0
7853       start_time = time.time()
7854
7855       while offset < size:
7856         wipe_size = min(wipe_chunk_size, size - offset)
7857         logging.debug("Wiping disk %d, offset %s, chunk %s",
7858                       idx, offset, wipe_size)
7859         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7860         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7861                      (idx, offset, wipe_size))
7862         now = time.time()
7863         offset += wipe_size
7864         if now - last_output >= 60:
7865           eta = _CalcEta(now - start_time, offset, size)
7866           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7867                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7868           last_output = now
7869   finally:
7870     logging.info("Resume sync of instance %s disks", instance.name)
7871
7872     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7873
7874     for idx, success in enumerate(result.payload):
7875       if not success:
7876         lu.LogWarning("Resume sync of disk %d failed, please have a"
7877                       " look at the status and troubleshoot the issue", idx)
7878         logging.warn("resume-sync of instance %s for disks %d failed",
7879                      instance.name, idx)
7880
7881
7882 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7883   """Create all disks for an instance.
7884
7885   This abstracts away some work from AddInstance.
7886
7887   @type lu: L{LogicalUnit}
7888   @param lu: the logical unit on whose behalf we execute
7889   @type instance: L{objects.Instance}
7890   @param instance: the instance whose disks we should create
7891   @type to_skip: list
7892   @param to_skip: list of indices to skip
7893   @type target_node: string
7894   @param target_node: if passed, overrides the target node for creation
7895   @rtype: boolean
7896   @return: the success of the creation
7897
7898   """
7899   info = _GetInstanceInfoText(instance)
7900   if target_node is None:
7901     pnode = instance.primary_node
7902     all_nodes = instance.all_nodes
7903   else:
7904     pnode = target_node
7905     all_nodes = [pnode]
7906
7907   if instance.disk_template in constants.DTS_FILEBASED:
7908     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7909     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7910
7911     result.Raise("Failed to create directory '%s' on"
7912                  " node %s" % (file_storage_dir, pnode))
7913
7914   # Note: this needs to be kept in sync with adding of disks in
7915   # LUInstanceSetParams
7916   for idx, device in enumerate(instance.disks):
7917     if to_skip and idx in to_skip:
7918       continue
7919     logging.info("Creating volume %s for instance %s",
7920                  device.iv_name, instance.name)
7921     #HARDCODE
7922     for node in all_nodes:
7923       f_create = node == pnode
7924       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7925
7926
7927 def _RemoveDisks(lu, instance, target_node=None):
7928   """Remove all disks for an instance.
7929
7930   This abstracts away some work from `AddInstance()` and
7931   `RemoveInstance()`. Note that in case some of the devices couldn't
7932   be removed, the removal will continue with the other ones (compare
7933   with `_CreateDisks()`).
7934
7935   @type lu: L{LogicalUnit}
7936   @param lu: the logical unit on whose behalf we execute
7937   @type instance: L{objects.Instance}
7938   @param instance: the instance whose disks we should remove
7939   @type target_node: string
7940   @param target_node: used to override the node on which to remove the disks
7941   @rtype: boolean
7942   @return: the success of the removal
7943
7944   """
7945   logging.info("Removing block devices for instance %s", instance.name)
7946
7947   all_result = True
7948   for device in instance.disks:
7949     if target_node:
7950       edata = [(target_node, device)]
7951     else:
7952       edata = device.ComputeNodeTree(instance.primary_node)
7953     for node, disk in edata:
7954       lu.cfg.SetDiskID(disk, node)
7955       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7956       if msg:
7957         lu.LogWarning("Could not remove block device %s on node %s,"
7958                       " continuing anyway: %s", device.iv_name, node, msg)
7959         all_result = False
7960
7961   if instance.disk_template == constants.DT_FILE:
7962     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7963     if target_node:
7964       tgt = target_node
7965     else:
7966       tgt = instance.primary_node
7967     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7968     if result.fail_msg:
7969       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7970                     file_storage_dir, instance.primary_node, result.fail_msg)
7971       all_result = False
7972
7973   return all_result
7974
7975
7976 def _ComputeDiskSizePerVG(disk_template, disks):
7977   """Compute disk size requirements in the volume group
7978
7979   """
7980   def _compute(disks, payload):
7981     """Universal algorithm.
7982
7983     """
7984     vgs = {}
7985     for disk in disks:
7986       vgs[disk[constants.IDISK_VG]] = \
7987         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7988
7989     return vgs
7990
7991   # Required free disk space as a function of disk and swap space
7992   req_size_dict = {
7993     constants.DT_DISKLESS: {},
7994     constants.DT_PLAIN: _compute(disks, 0),
7995     # 128 MB are added for drbd metadata for each disk
7996     constants.DT_DRBD8: _compute(disks, 128),
7997     constants.DT_FILE: {},
7998     constants.DT_SHARED_FILE: {},
7999   }
8000
8001   if disk_template not in req_size_dict:
8002     raise errors.ProgrammerError("Disk template '%s' size requirement"
8003                                  " is unknown" % disk_template)
8004
8005   return req_size_dict[disk_template]
8006
8007
8008 def _ComputeDiskSize(disk_template, disks):
8009   """Compute disk size requirements in the volume group
8010
8011   """
8012   # Required free disk space as a function of disk and swap space
8013   req_size_dict = {
8014     constants.DT_DISKLESS: None,
8015     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8016     # 128 MB are added for drbd metadata for each disk
8017     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8018     constants.DT_FILE: None,
8019     constants.DT_SHARED_FILE: 0,
8020     constants.DT_BLOCK: 0,
8021   }
8022
8023   if disk_template not in req_size_dict:
8024     raise errors.ProgrammerError("Disk template '%s' size requirement"
8025                                  " is unknown" % disk_template)
8026
8027   return req_size_dict[disk_template]
8028
8029
8030 def _FilterVmNodes(lu, nodenames):
8031   """Filters out non-vm_capable nodes from a list.
8032
8033   @type lu: L{LogicalUnit}
8034   @param lu: the logical unit for which we check
8035   @type nodenames: list
8036   @param nodenames: the list of nodes on which we should check
8037   @rtype: list
8038   @return: the list of vm-capable nodes
8039
8040   """
8041   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8042   return [name for name in nodenames if name not in vm_nodes]
8043
8044
8045 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8046   """Hypervisor parameter validation.
8047
8048   This function abstract the hypervisor parameter validation to be
8049   used in both instance create and instance modify.
8050
8051   @type lu: L{LogicalUnit}
8052   @param lu: the logical unit for which we check
8053   @type nodenames: list
8054   @param nodenames: the list of nodes on which we should check
8055   @type hvname: string
8056   @param hvname: the name of the hypervisor we should use
8057   @type hvparams: dict
8058   @param hvparams: the parameters which we need to check
8059   @raise errors.OpPrereqError: if the parameters are not valid
8060
8061   """
8062   nodenames = _FilterVmNodes(lu, nodenames)
8063   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8064                                                   hvname,
8065                                                   hvparams)
8066   for node in nodenames:
8067     info = hvinfo[node]
8068     if info.offline:
8069       continue
8070     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8071
8072
8073 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8074   """OS parameters validation.
8075
8076   @type lu: L{LogicalUnit}
8077   @param lu: the logical unit for which we check
8078   @type required: boolean
8079   @param required: whether the validation should fail if the OS is not
8080       found
8081   @type nodenames: list
8082   @param nodenames: the list of nodes on which we should check
8083   @type osname: string
8084   @param osname: the name of the hypervisor we should use
8085   @type osparams: dict
8086   @param osparams: the parameters which we need to check
8087   @raise errors.OpPrereqError: if the parameters are not valid
8088
8089   """
8090   nodenames = _FilterVmNodes(lu, nodenames)
8091   result = lu.rpc.call_os_validate(required, nodenames, osname,
8092                                    [constants.OS_VALIDATE_PARAMETERS],
8093                                    osparams)
8094   for node, nres in result.items():
8095     # we don't check for offline cases since this should be run only
8096     # against the master node and/or an instance's nodes
8097     nres.Raise("OS Parameters validation failed on node %s" % node)
8098     if not nres.payload:
8099       lu.LogInfo("OS %s not found on node %s, validation skipped",
8100                  osname, node)
8101
8102
8103 class LUInstanceCreate(LogicalUnit):
8104   """Create an instance.
8105
8106   """
8107   HPATH = "instance-add"
8108   HTYPE = constants.HTYPE_INSTANCE
8109   REQ_BGL = False
8110
8111   def CheckArguments(self):
8112     """Check arguments.
8113
8114     """
8115     # do not require name_check to ease forward/backward compatibility
8116     # for tools
8117     if self.op.no_install and self.op.start:
8118       self.LogInfo("No-installation mode selected, disabling startup")
8119       self.op.start = False
8120     # validate/normalize the instance name
8121     self.op.instance_name = \
8122       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8123
8124     if self.op.ip_check and not self.op.name_check:
8125       # TODO: make the ip check more flexible and not depend on the name check
8126       raise errors.OpPrereqError("Cannot do IP address check without a name"
8127                                  " check", errors.ECODE_INVAL)
8128
8129     # check nics' parameter names
8130     for nic in self.op.nics:
8131       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8132
8133     # check disks. parameter names and consistent adopt/no-adopt strategy
8134     has_adopt = has_no_adopt = False
8135     for disk in self.op.disks:
8136       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8137       if constants.IDISK_ADOPT in disk:
8138         has_adopt = True
8139       else:
8140         has_no_adopt = True
8141     if has_adopt and has_no_adopt:
8142       raise errors.OpPrereqError("Either all disks are adopted or none is",
8143                                  errors.ECODE_INVAL)
8144     if has_adopt:
8145       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8146         raise errors.OpPrereqError("Disk adoption is not supported for the"
8147                                    " '%s' disk template" %
8148                                    self.op.disk_template,
8149                                    errors.ECODE_INVAL)
8150       if self.op.iallocator is not None:
8151         raise errors.OpPrereqError("Disk adoption not allowed with an"
8152                                    " iallocator script", errors.ECODE_INVAL)
8153       if self.op.mode == constants.INSTANCE_IMPORT:
8154         raise errors.OpPrereqError("Disk adoption not allowed for"
8155                                    " instance import", errors.ECODE_INVAL)
8156     else:
8157       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8158         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8159                                    " but no 'adopt' parameter given" %
8160                                    self.op.disk_template,
8161                                    errors.ECODE_INVAL)
8162
8163     self.adopt_disks = has_adopt
8164
8165     # instance name verification
8166     if self.op.name_check:
8167       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8168       self.op.instance_name = self.hostname1.name
8169       # used in CheckPrereq for ip ping check
8170       self.check_ip = self.hostname1.ip
8171     else:
8172       self.check_ip = None
8173
8174     # file storage checks
8175     if (self.op.file_driver and
8176         not self.op.file_driver in constants.FILE_DRIVER):
8177       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8178                                  self.op.file_driver, errors.ECODE_INVAL)
8179
8180     if self.op.disk_template == constants.DT_FILE:
8181       opcodes.RequireFileStorage()
8182     elif self.op.disk_template == constants.DT_SHARED_FILE:
8183       opcodes.RequireSharedFileStorage()
8184
8185     ### Node/iallocator related checks
8186     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8187
8188     if self.op.pnode is not None:
8189       if self.op.disk_template in constants.DTS_INT_MIRROR:
8190         if self.op.snode is None:
8191           raise errors.OpPrereqError("The networked disk templates need"
8192                                      " a mirror node", errors.ECODE_INVAL)
8193       elif self.op.snode:
8194         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8195                         " template")
8196         self.op.snode = None
8197
8198     self._cds = _GetClusterDomainSecret()
8199
8200     if self.op.mode == constants.INSTANCE_IMPORT:
8201       # On import force_variant must be True, because if we forced it at
8202       # initial install, our only chance when importing it back is that it
8203       # works again!
8204       self.op.force_variant = True
8205
8206       if self.op.no_install:
8207         self.LogInfo("No-installation mode has no effect during import")
8208
8209     elif self.op.mode == constants.INSTANCE_CREATE:
8210       if self.op.os_type is None:
8211         raise errors.OpPrereqError("No guest OS specified",
8212                                    errors.ECODE_INVAL)
8213       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8214         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8215                                    " installation" % self.op.os_type,
8216                                    errors.ECODE_STATE)
8217       if self.op.disk_template is None:
8218         raise errors.OpPrereqError("No disk template specified",
8219                                    errors.ECODE_INVAL)
8220
8221     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8222       # Check handshake to ensure both clusters have the same domain secret
8223       src_handshake = self.op.source_handshake
8224       if not src_handshake:
8225         raise errors.OpPrereqError("Missing source handshake",
8226                                    errors.ECODE_INVAL)
8227
8228       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8229                                                            src_handshake)
8230       if errmsg:
8231         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8232                                    errors.ECODE_INVAL)
8233
8234       # Load and check source CA
8235       self.source_x509_ca_pem = self.op.source_x509_ca
8236       if not self.source_x509_ca_pem:
8237         raise errors.OpPrereqError("Missing source X509 CA",
8238                                    errors.ECODE_INVAL)
8239
8240       try:
8241         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8242                                                     self._cds)
8243       except OpenSSL.crypto.Error, err:
8244         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8245                                    (err, ), errors.ECODE_INVAL)
8246
8247       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8248       if errcode is not None:
8249         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8250                                    errors.ECODE_INVAL)
8251
8252       self.source_x509_ca = cert
8253
8254       src_instance_name = self.op.source_instance_name
8255       if not src_instance_name:
8256         raise errors.OpPrereqError("Missing source instance name",
8257                                    errors.ECODE_INVAL)
8258
8259       self.source_instance_name = \
8260           netutils.GetHostname(name=src_instance_name).name
8261
8262     else:
8263       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8264                                  self.op.mode, errors.ECODE_INVAL)
8265
8266   def ExpandNames(self):
8267     """ExpandNames for CreateInstance.
8268
8269     Figure out the right locks for instance creation.
8270
8271     """
8272     self.needed_locks = {}
8273
8274     instance_name = self.op.instance_name
8275     # this is just a preventive check, but someone might still add this
8276     # instance in the meantime, and creation will fail at lock-add time
8277     if instance_name in self.cfg.GetInstanceList():
8278       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8279                                  instance_name, errors.ECODE_EXISTS)
8280
8281     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8282
8283     if self.op.iallocator:
8284       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8285     else:
8286       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8287       nodelist = [self.op.pnode]
8288       if self.op.snode is not None:
8289         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8290         nodelist.append(self.op.snode)
8291       self.needed_locks[locking.LEVEL_NODE] = nodelist
8292
8293     # in case of import lock the source node too
8294     if self.op.mode == constants.INSTANCE_IMPORT:
8295       src_node = self.op.src_node
8296       src_path = self.op.src_path
8297
8298       if src_path is None:
8299         self.op.src_path = src_path = self.op.instance_name
8300
8301       if src_node is None:
8302         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8303         self.op.src_node = None
8304         if os.path.isabs(src_path):
8305           raise errors.OpPrereqError("Importing an instance from a path"
8306                                      " requires a source node option",
8307                                      errors.ECODE_INVAL)
8308       else:
8309         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8310         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8311           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8312         if not os.path.isabs(src_path):
8313           self.op.src_path = src_path = \
8314             utils.PathJoin(constants.EXPORT_DIR, src_path)
8315
8316   def _RunAllocator(self):
8317     """Run the allocator based on input opcode.
8318
8319     """
8320     nics = [n.ToDict() for n in self.nics]
8321     ial = IAllocator(self.cfg, self.rpc,
8322                      mode=constants.IALLOCATOR_MODE_ALLOC,
8323                      name=self.op.instance_name,
8324                      disk_template=self.op.disk_template,
8325                      tags=self.op.tags,
8326                      os=self.op.os_type,
8327                      vcpus=self.be_full[constants.BE_VCPUS],
8328                      memory=self.be_full[constants.BE_MEMORY],
8329                      disks=self.disks,
8330                      nics=nics,
8331                      hypervisor=self.op.hypervisor,
8332                      )
8333
8334     ial.Run(self.op.iallocator)
8335
8336     if not ial.success:
8337       raise errors.OpPrereqError("Can't compute nodes using"
8338                                  " iallocator '%s': %s" %
8339                                  (self.op.iallocator, ial.info),
8340                                  errors.ECODE_NORES)
8341     if len(ial.result) != ial.required_nodes:
8342       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8343                                  " of nodes (%s), required %s" %
8344                                  (self.op.iallocator, len(ial.result),
8345                                   ial.required_nodes), errors.ECODE_FAULT)
8346     self.op.pnode = ial.result[0]
8347     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8348                  self.op.instance_name, self.op.iallocator,
8349                  utils.CommaJoin(ial.result))
8350     if ial.required_nodes == 2:
8351       self.op.snode = ial.result[1]
8352
8353   def BuildHooksEnv(self):
8354     """Build hooks env.
8355
8356     This runs on master, primary and secondary nodes of the instance.
8357
8358     """
8359     env = {
8360       "ADD_MODE": self.op.mode,
8361       }
8362     if self.op.mode == constants.INSTANCE_IMPORT:
8363       env["SRC_NODE"] = self.op.src_node
8364       env["SRC_PATH"] = self.op.src_path
8365       env["SRC_IMAGES"] = self.src_images
8366
8367     env.update(_BuildInstanceHookEnv(
8368       name=self.op.instance_name,
8369       primary_node=self.op.pnode,
8370       secondary_nodes=self.secondaries,
8371       status=self.op.start,
8372       os_type=self.op.os_type,
8373       memory=self.be_full[constants.BE_MEMORY],
8374       vcpus=self.be_full[constants.BE_VCPUS],
8375       nics=_NICListToTuple(self, self.nics),
8376       disk_template=self.op.disk_template,
8377       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8378              for d in self.disks],
8379       bep=self.be_full,
8380       hvp=self.hv_full,
8381       hypervisor_name=self.op.hypervisor,
8382       tags=self.op.tags,
8383     ))
8384
8385     return env
8386
8387   def BuildHooksNodes(self):
8388     """Build hooks nodes.
8389
8390     """
8391     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8392     return nl, nl
8393
8394   def _ReadExportInfo(self):
8395     """Reads the export information from disk.
8396
8397     It will override the opcode source node and path with the actual
8398     information, if these two were not specified before.
8399
8400     @return: the export information
8401
8402     """
8403     assert self.op.mode == constants.INSTANCE_IMPORT
8404
8405     src_node = self.op.src_node
8406     src_path = self.op.src_path
8407
8408     if src_node is None:
8409       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8410       exp_list = self.rpc.call_export_list(locked_nodes)
8411       found = False
8412       for node in exp_list:
8413         if exp_list[node].fail_msg:
8414           continue
8415         if src_path in exp_list[node].payload:
8416           found = True
8417           self.op.src_node = src_node = node
8418           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8419                                                        src_path)
8420           break
8421       if not found:
8422         raise errors.OpPrereqError("No export found for relative path %s" %
8423                                     src_path, errors.ECODE_INVAL)
8424
8425     _CheckNodeOnline(self, src_node)
8426     result = self.rpc.call_export_info(src_node, src_path)
8427     result.Raise("No export or invalid export found in dir %s" % src_path)
8428
8429     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8430     if not export_info.has_section(constants.INISECT_EXP):
8431       raise errors.ProgrammerError("Corrupted export config",
8432                                    errors.ECODE_ENVIRON)
8433
8434     ei_version = export_info.get(constants.INISECT_EXP, "version")
8435     if (int(ei_version) != constants.EXPORT_VERSION):
8436       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8437                                  (ei_version, constants.EXPORT_VERSION),
8438                                  errors.ECODE_ENVIRON)
8439     return export_info
8440
8441   def _ReadExportParams(self, einfo):
8442     """Use export parameters as defaults.
8443
8444     In case the opcode doesn't specify (as in override) some instance
8445     parameters, then try to use them from the export information, if
8446     that declares them.
8447
8448     """
8449     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8450
8451     if self.op.disk_template is None:
8452       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8453         self.op.disk_template = einfo.get(constants.INISECT_INS,
8454                                           "disk_template")
8455       else:
8456         raise errors.OpPrereqError("No disk template specified and the export"
8457                                    " is missing the disk_template information",
8458                                    errors.ECODE_INVAL)
8459
8460     if not self.op.disks:
8461       if einfo.has_option(constants.INISECT_INS, "disk_count"):
8462         disks = []
8463         # TODO: import the disk iv_name too
8464         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8465           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8466           disks.append({constants.IDISK_SIZE: disk_sz})
8467         self.op.disks = disks
8468       else:
8469         raise errors.OpPrereqError("No disk info specified and the export"
8470                                    " is missing the disk information",
8471                                    errors.ECODE_INVAL)
8472
8473     if (not self.op.nics and
8474         einfo.has_option(constants.INISECT_INS, "nic_count")):
8475       nics = []
8476       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8477         ndict = {}
8478         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8479           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8480           ndict[name] = v
8481         nics.append(ndict)
8482       self.op.nics = nics
8483
8484     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8485       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8486
8487     if (self.op.hypervisor is None and
8488         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8489       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8490
8491     if einfo.has_section(constants.INISECT_HYP):
8492       # use the export parameters but do not override the ones
8493       # specified by the user
8494       for name, value in einfo.items(constants.INISECT_HYP):
8495         if name not in self.op.hvparams:
8496           self.op.hvparams[name] = value
8497
8498     if einfo.has_section(constants.INISECT_BEP):
8499       # use the parameters, without overriding
8500       for name, value in einfo.items(constants.INISECT_BEP):
8501         if name not in self.op.beparams:
8502           self.op.beparams[name] = value
8503     else:
8504       # try to read the parameters old style, from the main section
8505       for name in constants.BES_PARAMETERS:
8506         if (name not in self.op.beparams and
8507             einfo.has_option(constants.INISECT_INS, name)):
8508           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8509
8510     if einfo.has_section(constants.INISECT_OSP):
8511       # use the parameters, without overriding
8512       for name, value in einfo.items(constants.INISECT_OSP):
8513         if name not in self.op.osparams:
8514           self.op.osparams[name] = value
8515
8516   def _RevertToDefaults(self, cluster):
8517     """Revert the instance parameters to the default values.
8518
8519     """
8520     # hvparams
8521     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8522     for name in self.op.hvparams.keys():
8523       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8524         del self.op.hvparams[name]
8525     # beparams
8526     be_defs = cluster.SimpleFillBE({})
8527     for name in self.op.beparams.keys():
8528       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8529         del self.op.beparams[name]
8530     # nic params
8531     nic_defs = cluster.SimpleFillNIC({})
8532     for nic in self.op.nics:
8533       for name in constants.NICS_PARAMETERS:
8534         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8535           del nic[name]
8536     # osparams
8537     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8538     for name in self.op.osparams.keys():
8539       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8540         del self.op.osparams[name]
8541
8542   def _CalculateFileStorageDir(self):
8543     """Calculate final instance file storage dir.
8544
8545     """
8546     # file storage dir calculation/check
8547     self.instance_file_storage_dir = None
8548     if self.op.disk_template in constants.DTS_FILEBASED:
8549       # build the full file storage dir path
8550       joinargs = []
8551
8552       if self.op.disk_template == constants.DT_SHARED_FILE:
8553         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8554       else:
8555         get_fsd_fn = self.cfg.GetFileStorageDir
8556
8557       cfg_storagedir = get_fsd_fn()
8558       if not cfg_storagedir:
8559         raise errors.OpPrereqError("Cluster file storage dir not defined")
8560       joinargs.append(cfg_storagedir)
8561
8562       if self.op.file_storage_dir is not None:
8563         joinargs.append(self.op.file_storage_dir)
8564
8565       joinargs.append(self.op.instance_name)
8566
8567       # pylint: disable=W0142
8568       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8569
8570   def CheckPrereq(self):
8571     """Check prerequisites.
8572
8573     """
8574     self._CalculateFileStorageDir()
8575
8576     if self.op.mode == constants.INSTANCE_IMPORT:
8577       export_info = self._ReadExportInfo()
8578       self._ReadExportParams(export_info)
8579
8580     if (not self.cfg.GetVGName() and
8581         self.op.disk_template not in constants.DTS_NOT_LVM):
8582       raise errors.OpPrereqError("Cluster does not support lvm-based"
8583                                  " instances", errors.ECODE_STATE)
8584
8585     if self.op.hypervisor is None:
8586       self.op.hypervisor = self.cfg.GetHypervisorType()
8587
8588     cluster = self.cfg.GetClusterInfo()
8589     enabled_hvs = cluster.enabled_hypervisors
8590     if self.op.hypervisor not in enabled_hvs:
8591       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8592                                  " cluster (%s)" % (self.op.hypervisor,
8593                                   ",".join(enabled_hvs)),
8594                                  errors.ECODE_STATE)
8595
8596     # Check tag validity
8597     for tag in self.op.tags:
8598       objects.TaggableObject.ValidateTag(tag)
8599
8600     # check hypervisor parameter syntax (locally)
8601     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8602     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8603                                       self.op.hvparams)
8604     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8605     hv_type.CheckParameterSyntax(filled_hvp)
8606     self.hv_full = filled_hvp
8607     # check that we don't specify global parameters on an instance
8608     _CheckGlobalHvParams(self.op.hvparams)
8609
8610     # fill and remember the beparams dict
8611     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8612     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8613
8614     # build os parameters
8615     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8616
8617     # now that hvp/bep are in final format, let's reset to defaults,
8618     # if told to do so
8619     if self.op.identify_defaults:
8620       self._RevertToDefaults(cluster)
8621
8622     # NIC buildup
8623     self.nics = []
8624     for idx, nic in enumerate(self.op.nics):
8625       nic_mode_req = nic.get(constants.INIC_MODE, None)
8626       nic_mode = nic_mode_req
8627       if nic_mode is None:
8628         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8629
8630       # in routed mode, for the first nic, the default ip is 'auto'
8631       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8632         default_ip_mode = constants.VALUE_AUTO
8633       else:
8634         default_ip_mode = constants.VALUE_NONE
8635
8636       # ip validity checks
8637       ip = nic.get(constants.INIC_IP, default_ip_mode)
8638       if ip is None or ip.lower() == constants.VALUE_NONE:
8639         nic_ip = None
8640       elif ip.lower() == constants.VALUE_AUTO:
8641         if not self.op.name_check:
8642           raise errors.OpPrereqError("IP address set to auto but name checks"
8643                                      " have been skipped",
8644                                      errors.ECODE_INVAL)
8645         nic_ip = self.hostname1.ip
8646       else:
8647         if not netutils.IPAddress.IsValid(ip):
8648           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8649                                      errors.ECODE_INVAL)
8650         nic_ip = ip
8651
8652       # TODO: check the ip address for uniqueness
8653       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8654         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8655                                    errors.ECODE_INVAL)
8656
8657       # MAC address verification
8658       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8659       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8660         mac = utils.NormalizeAndValidateMac(mac)
8661
8662         try:
8663           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8664         except errors.ReservationError:
8665           raise errors.OpPrereqError("MAC address %s already in use"
8666                                      " in cluster" % mac,
8667                                      errors.ECODE_NOTUNIQUE)
8668
8669       #  Build nic parameters
8670       link = nic.get(constants.INIC_LINK, None)
8671       nicparams = {}
8672       if nic_mode_req:
8673         nicparams[constants.NIC_MODE] = nic_mode_req
8674       if link:
8675         nicparams[constants.NIC_LINK] = link
8676
8677       check_params = cluster.SimpleFillNIC(nicparams)
8678       objects.NIC.CheckParameterSyntax(check_params)
8679       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8680
8681     # disk checks/pre-build
8682     default_vg = self.cfg.GetVGName()
8683     self.disks = []
8684     for disk in self.op.disks:
8685       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8686       if mode not in constants.DISK_ACCESS_SET:
8687         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8688                                    mode, errors.ECODE_INVAL)
8689       size = disk.get(constants.IDISK_SIZE, None)
8690       if size is None:
8691         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8692       try:
8693         size = int(size)
8694       except (TypeError, ValueError):
8695         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8696                                    errors.ECODE_INVAL)
8697
8698       data_vg = disk.get(constants.IDISK_VG, default_vg)
8699       new_disk = {
8700         constants.IDISK_SIZE: size,
8701         constants.IDISK_MODE: mode,
8702         constants.IDISK_VG: data_vg,
8703         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8704         }
8705       if constants.IDISK_ADOPT in disk:
8706         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8707       self.disks.append(new_disk)
8708
8709     if self.op.mode == constants.INSTANCE_IMPORT:
8710
8711       # Check that the new instance doesn't have less disks than the export
8712       instance_disks = len(self.disks)
8713       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8714       if instance_disks < export_disks:
8715         raise errors.OpPrereqError("Not enough disks to import."
8716                                    " (instance: %d, export: %d)" %
8717                                    (instance_disks, export_disks),
8718                                    errors.ECODE_INVAL)
8719
8720       disk_images = []
8721       for idx in range(export_disks):
8722         option = "disk%d_dump" % idx
8723         if export_info.has_option(constants.INISECT_INS, option):
8724           # FIXME: are the old os-es, disk sizes, etc. useful?
8725           export_name = export_info.get(constants.INISECT_INS, option)
8726           image = utils.PathJoin(self.op.src_path, export_name)
8727           disk_images.append(image)
8728         else:
8729           disk_images.append(False)
8730
8731       self.src_images = disk_images
8732
8733       old_name = export_info.get(constants.INISECT_INS, "name")
8734       try:
8735         exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8736       except (TypeError, ValueError), err:
8737         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8738                                    " an integer: %s" % str(err),
8739                                    errors.ECODE_STATE)
8740       if self.op.instance_name == old_name:
8741         for idx, nic in enumerate(self.nics):
8742           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8743             nic_mac_ini = "nic%d_mac" % idx
8744             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8745
8746     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8747
8748     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8749     if self.op.ip_check:
8750       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8751         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8752                                    (self.check_ip, self.op.instance_name),
8753                                    errors.ECODE_NOTUNIQUE)
8754
8755     #### mac address generation
8756     # By generating here the mac address both the allocator and the hooks get
8757     # the real final mac address rather than the 'auto' or 'generate' value.
8758     # There is a race condition between the generation and the instance object
8759     # creation, which means that we know the mac is valid now, but we're not
8760     # sure it will be when we actually add the instance. If things go bad
8761     # adding the instance will abort because of a duplicate mac, and the
8762     # creation job will fail.
8763     for nic in self.nics:
8764       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8765         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8766
8767     #### allocator run
8768
8769     if self.op.iallocator is not None:
8770       self._RunAllocator()
8771
8772     #### node related checks
8773
8774     # check primary node
8775     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8776     assert self.pnode is not None, \
8777       "Cannot retrieve locked node %s" % self.op.pnode
8778     if pnode.offline:
8779       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8780                                  pnode.name, errors.ECODE_STATE)
8781     if pnode.drained:
8782       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8783                                  pnode.name, errors.ECODE_STATE)
8784     if not pnode.vm_capable:
8785       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8786                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8787
8788     self.secondaries = []
8789
8790     # mirror node verification
8791     if self.op.disk_template in constants.DTS_INT_MIRROR:
8792       if self.op.snode == pnode.name:
8793         raise errors.OpPrereqError("The secondary node cannot be the"
8794                                    " primary node", errors.ECODE_INVAL)
8795       _CheckNodeOnline(self, self.op.snode)
8796       _CheckNodeNotDrained(self, self.op.snode)
8797       _CheckNodeVmCapable(self, self.op.snode)
8798       self.secondaries.append(self.op.snode)
8799
8800     nodenames = [pnode.name] + self.secondaries
8801
8802     if not self.adopt_disks:
8803       # Check lv size requirements, if not adopting
8804       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8805       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8806
8807     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8808       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8809                                 disk[constants.IDISK_ADOPT])
8810                      for disk in self.disks])
8811       if len(all_lvs) != len(self.disks):
8812         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8813                                    errors.ECODE_INVAL)
8814       for lv_name in all_lvs:
8815         try:
8816           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8817           # to ReserveLV uses the same syntax
8818           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8819         except errors.ReservationError:
8820           raise errors.OpPrereqError("LV named %s used by another instance" %
8821                                      lv_name, errors.ECODE_NOTUNIQUE)
8822
8823       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8824       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8825
8826       node_lvs = self.rpc.call_lv_list([pnode.name],
8827                                        vg_names.payload.keys())[pnode.name]
8828       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8829       node_lvs = node_lvs.payload
8830
8831       delta = all_lvs.difference(node_lvs.keys())
8832       if delta:
8833         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8834                                    utils.CommaJoin(delta),
8835                                    errors.ECODE_INVAL)
8836       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8837       if online_lvs:
8838         raise errors.OpPrereqError("Online logical volumes found, cannot"
8839                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8840                                    errors.ECODE_STATE)
8841       # update the size of disk based on what is found
8842       for dsk in self.disks:
8843         dsk[constants.IDISK_SIZE] = \
8844           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8845                                         dsk[constants.IDISK_ADOPT])][0]))
8846
8847     elif self.op.disk_template == constants.DT_BLOCK:
8848       # Normalize and de-duplicate device paths
8849       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8850                        for disk in self.disks])
8851       if len(all_disks) != len(self.disks):
8852         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8853                                    errors.ECODE_INVAL)
8854       baddisks = [d for d in all_disks
8855                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8856       if baddisks:
8857         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8858                                    " cannot be adopted" %
8859                                    (", ".join(baddisks),
8860                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8861                                    errors.ECODE_INVAL)
8862
8863       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8864                                             list(all_disks))[pnode.name]
8865       node_disks.Raise("Cannot get block device information from node %s" %
8866                        pnode.name)
8867       node_disks = node_disks.payload
8868       delta = all_disks.difference(node_disks.keys())
8869       if delta:
8870         raise errors.OpPrereqError("Missing block device(s): %s" %
8871                                    utils.CommaJoin(delta),
8872                                    errors.ECODE_INVAL)
8873       for dsk in self.disks:
8874         dsk[constants.IDISK_SIZE] = \
8875           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8876
8877     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8878
8879     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8880     # check OS parameters (remotely)
8881     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8882
8883     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8884
8885     # memory check on primary node
8886     if self.op.start:
8887       _CheckNodeFreeMemory(self, self.pnode.name,
8888                            "creating instance %s" % self.op.instance_name,
8889                            self.be_full[constants.BE_MEMORY],
8890                            self.op.hypervisor)
8891
8892     self.dry_run_result = list(nodenames)
8893
8894   def Exec(self, feedback_fn):
8895     """Create and add the instance to the cluster.
8896
8897     """
8898     instance = self.op.instance_name
8899     pnode_name = self.pnode.name
8900
8901     ht_kind = self.op.hypervisor
8902     if ht_kind in constants.HTS_REQ_PORT:
8903       network_port = self.cfg.AllocatePort()
8904     else:
8905       network_port = None
8906
8907     disks = _GenerateDiskTemplate(self,
8908                                   self.op.disk_template,
8909                                   instance, pnode_name,
8910                                   self.secondaries,
8911                                   self.disks,
8912                                   self.instance_file_storage_dir,
8913                                   self.op.file_driver,
8914                                   0,
8915                                   feedback_fn)
8916
8917     iobj = objects.Instance(name=instance, os=self.op.os_type,
8918                             primary_node=pnode_name,
8919                             nics=self.nics, disks=disks,
8920                             disk_template=self.op.disk_template,
8921                             admin_up=False,
8922                             network_port=network_port,
8923                             beparams=self.op.beparams,
8924                             hvparams=self.op.hvparams,
8925                             hypervisor=self.op.hypervisor,
8926                             osparams=self.op.osparams,
8927                             )
8928
8929     if self.op.tags:
8930       for tag in self.op.tags:
8931         iobj.AddTag(tag)
8932
8933     if self.adopt_disks:
8934       if self.op.disk_template == constants.DT_PLAIN:
8935         # rename LVs to the newly-generated names; we need to construct
8936         # 'fake' LV disks with the old data, plus the new unique_id
8937         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8938         rename_to = []
8939         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8940           rename_to.append(t_dsk.logical_id)
8941           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8942           self.cfg.SetDiskID(t_dsk, pnode_name)
8943         result = self.rpc.call_blockdev_rename(pnode_name,
8944                                                zip(tmp_disks, rename_to))
8945         result.Raise("Failed to rename adoped LVs")
8946     else:
8947       feedback_fn("* creating instance disks...")
8948       try:
8949         _CreateDisks(self, iobj)
8950       except errors.OpExecError:
8951         self.LogWarning("Device creation failed, reverting...")
8952         try:
8953           _RemoveDisks(self, iobj)
8954         finally:
8955           self.cfg.ReleaseDRBDMinors(instance)
8956           raise
8957
8958     feedback_fn("adding instance %s to cluster config" % instance)
8959
8960     self.cfg.AddInstance(iobj, self.proc.GetECId())
8961
8962     # Declare that we don't want to remove the instance lock anymore, as we've
8963     # added the instance to the config
8964     del self.remove_locks[locking.LEVEL_INSTANCE]
8965
8966     if self.op.mode == constants.INSTANCE_IMPORT:
8967       # Release unused nodes
8968       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8969     else:
8970       # Release all nodes
8971       _ReleaseLocks(self, locking.LEVEL_NODE)
8972
8973     disk_abort = False
8974     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8975       feedback_fn("* wiping instance disks...")
8976       try:
8977         _WipeDisks(self, iobj)
8978       except errors.OpExecError, err:
8979         logging.exception("Wiping disks failed")
8980         self.LogWarning("Wiping instance disks failed (%s)", err)
8981         disk_abort = True
8982
8983     if disk_abort:
8984       # Something is already wrong with the disks, don't do anything else
8985       pass
8986     elif self.op.wait_for_sync:
8987       disk_abort = not _WaitForSync(self, iobj)
8988     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8989       # make sure the disks are not degraded (still sync-ing is ok)
8990       feedback_fn("* checking mirrors status")
8991       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8992     else:
8993       disk_abort = False
8994
8995     if disk_abort:
8996       _RemoveDisks(self, iobj)
8997       self.cfg.RemoveInstance(iobj.name)
8998       # Make sure the instance lock gets removed
8999       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9000       raise errors.OpExecError("There are some degraded disks for"
9001                                " this instance")
9002
9003     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9004       if self.op.mode == constants.INSTANCE_CREATE:
9005         if not self.op.no_install:
9006           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9007                         not self.op.wait_for_sync)
9008           if pause_sync:
9009             feedback_fn("* pausing disk sync to install instance OS")
9010             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9011                                                               iobj.disks, True)
9012             for idx, success in enumerate(result.payload):
9013               if not success:
9014                 logging.warn("pause-sync of instance %s for disk %d failed",
9015                              instance, idx)
9016
9017           feedback_fn("* running the instance OS create scripts...")
9018           # FIXME: pass debug option from opcode to backend
9019           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
9020                                                  self.op.debug_level)
9021           if pause_sync:
9022             feedback_fn("* resuming disk sync")
9023             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9024                                                               iobj.disks, False)
9025             for idx, success in enumerate(result.payload):
9026               if not success:
9027                 logging.warn("resume-sync of instance %s for disk %d failed",
9028                              instance, idx)
9029
9030           result.Raise("Could not add os for instance %s"
9031                        " on node %s" % (instance, pnode_name))
9032
9033       elif self.op.mode == constants.INSTANCE_IMPORT:
9034         feedback_fn("* running the instance OS import scripts...")
9035
9036         transfers = []
9037
9038         for idx, image in enumerate(self.src_images):
9039           if not image:
9040             continue
9041
9042           # FIXME: pass debug option from opcode to backend
9043           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9044                                              constants.IEIO_FILE, (image, ),
9045                                              constants.IEIO_SCRIPT,
9046                                              (iobj.disks[idx], idx),
9047                                              None)
9048           transfers.append(dt)
9049
9050         import_result = \
9051           masterd.instance.TransferInstanceData(self, feedback_fn,
9052                                                 self.op.src_node, pnode_name,
9053                                                 self.pnode.secondary_ip,
9054                                                 iobj, transfers)
9055         if not compat.all(import_result):
9056           self.LogWarning("Some disks for instance %s on node %s were not"
9057                           " imported successfully" % (instance, pnode_name))
9058
9059       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9060         feedback_fn("* preparing remote import...")
9061         # The source cluster will stop the instance before attempting to make a
9062         # connection. In some cases stopping an instance can take a long time,
9063         # hence the shutdown timeout is added to the connection timeout.
9064         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9065                            self.op.source_shutdown_timeout)
9066         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9067
9068         assert iobj.primary_node == self.pnode.name
9069         disk_results = \
9070           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9071                                         self.source_x509_ca,
9072                                         self._cds, timeouts)
9073         if not compat.all(disk_results):
9074           # TODO: Should the instance still be started, even if some disks
9075           # failed to import (valid for local imports, too)?
9076           self.LogWarning("Some disks for instance %s on node %s were not"
9077                           " imported successfully" % (instance, pnode_name))
9078
9079         # Run rename script on newly imported instance
9080         assert iobj.name == instance
9081         feedback_fn("Running rename script for %s" % instance)
9082         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9083                                                    self.source_instance_name,
9084                                                    self.op.debug_level)
9085         if result.fail_msg:
9086           self.LogWarning("Failed to run rename script for %s on node"
9087                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9088
9089       else:
9090         # also checked in the prereq part
9091         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9092                                      % self.op.mode)
9093
9094     if self.op.start:
9095       iobj.admin_up = True
9096       self.cfg.Update(iobj, feedback_fn)
9097       logging.info("Starting instance %s on node %s", instance, pnode_name)
9098       feedback_fn("* starting instance...")
9099       result = self.rpc.call_instance_start(pnode_name, iobj,
9100                                             None, None, False)
9101       result.Raise("Could not start instance")
9102
9103     return list(iobj.all_nodes)
9104
9105
9106 class LUInstanceConsole(NoHooksLU):
9107   """Connect to an instance's console.
9108
9109   This is somewhat special in that it returns the command line that
9110   you need to run on the master node in order to connect to the
9111   console.
9112
9113   """
9114   REQ_BGL = False
9115
9116   def ExpandNames(self):
9117     self._ExpandAndLockInstance()
9118
9119   def CheckPrereq(self):
9120     """Check prerequisites.
9121
9122     This checks that the instance is in the cluster.
9123
9124     """
9125     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9126     assert self.instance is not None, \
9127       "Cannot retrieve locked instance %s" % self.op.instance_name
9128     _CheckNodeOnline(self, self.instance.primary_node)
9129
9130   def Exec(self, feedback_fn):
9131     """Connect to the console of an instance
9132
9133     """
9134     instance = self.instance
9135     node = instance.primary_node
9136
9137     node_insts = self.rpc.call_instance_list([node],
9138                                              [instance.hypervisor])[node]
9139     node_insts.Raise("Can't get node information from %s" % node)
9140
9141     if instance.name not in node_insts.payload:
9142       if instance.admin_up:
9143         state = constants.INSTST_ERRORDOWN
9144       else:
9145         state = constants.INSTST_ADMINDOWN
9146       raise errors.OpExecError("Instance %s is not running (state %s)" %
9147                                (instance.name, state))
9148
9149     logging.debug("Connecting to console of %s on %s", instance.name, node)
9150
9151     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9152
9153
9154 def _GetInstanceConsole(cluster, instance):
9155   """Returns console information for an instance.
9156
9157   @type cluster: L{objects.Cluster}
9158   @type instance: L{objects.Instance}
9159   @rtype: dict
9160
9161   """
9162   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9163   # beparams and hvparams are passed separately, to avoid editing the
9164   # instance and then saving the defaults in the instance itself.
9165   hvparams = cluster.FillHV(instance)
9166   beparams = cluster.FillBE(instance)
9167   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9168
9169   assert console.instance == instance.name
9170   assert console.Validate()
9171
9172   return console.ToDict()
9173
9174
9175 class LUInstanceReplaceDisks(LogicalUnit):
9176   """Replace the disks of an instance.
9177
9178   """
9179   HPATH = "mirrors-replace"
9180   HTYPE = constants.HTYPE_INSTANCE
9181   REQ_BGL = False
9182
9183   def CheckArguments(self):
9184     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9185                                   self.op.iallocator)
9186
9187   def ExpandNames(self):
9188     self._ExpandAndLockInstance()
9189
9190     assert locking.LEVEL_NODE not in self.needed_locks
9191     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9192
9193     assert self.op.iallocator is None or self.op.remote_node is None, \
9194       "Conflicting options"
9195
9196     if self.op.remote_node is not None:
9197       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9198
9199       # Warning: do not remove the locking of the new secondary here
9200       # unless DRBD8.AddChildren is changed to work in parallel;
9201       # currently it doesn't since parallel invocations of
9202       # FindUnusedMinor will conflict
9203       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9204       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9205     else:
9206       self.needed_locks[locking.LEVEL_NODE] = []
9207       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9208
9209       if self.op.iallocator is not None:
9210         # iallocator will select a new node in the same group
9211         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9212
9213     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9214                                    self.op.iallocator, self.op.remote_node,
9215                                    self.op.disks, False, self.op.early_release)
9216
9217     self.tasklets = [self.replacer]
9218
9219   def DeclareLocks(self, level):
9220     if level == locking.LEVEL_NODEGROUP:
9221       assert self.op.remote_node is None
9222       assert self.op.iallocator is not None
9223       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9224
9225       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9226       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9227         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9228
9229     elif level == locking.LEVEL_NODE:
9230       if self.op.iallocator is not None:
9231         assert self.op.remote_node is None
9232         assert not self.needed_locks[locking.LEVEL_NODE]
9233
9234         # Lock member nodes of all locked groups
9235         self.needed_locks[locking.LEVEL_NODE] = [node_name
9236           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9237           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9238       else:
9239         self._LockInstancesNodes()
9240
9241   def BuildHooksEnv(self):
9242     """Build hooks env.
9243
9244     This runs on the master, the primary and all the secondaries.
9245
9246     """
9247     instance = self.replacer.instance
9248     env = {
9249       "MODE": self.op.mode,
9250       "NEW_SECONDARY": self.op.remote_node,
9251       "OLD_SECONDARY": instance.secondary_nodes[0],
9252       }
9253     env.update(_BuildInstanceHookEnvByObject(self, instance))
9254     return env
9255
9256   def BuildHooksNodes(self):
9257     """Build hooks nodes.
9258
9259     """
9260     instance = self.replacer.instance
9261     nl = [
9262       self.cfg.GetMasterNode(),
9263       instance.primary_node,
9264       ]
9265     if self.op.remote_node is not None:
9266       nl.append(self.op.remote_node)
9267     return nl, nl
9268
9269   def CheckPrereq(self):
9270     """Check prerequisites.
9271
9272     """
9273     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9274             self.op.iallocator is None)
9275
9276     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9277     if owned_groups:
9278       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9279
9280     return LogicalUnit.CheckPrereq(self)
9281
9282
9283 class TLReplaceDisks(Tasklet):
9284   """Replaces disks for an instance.
9285
9286   Note: Locking is not within the scope of this class.
9287
9288   """
9289   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9290                disks, delay_iallocator, early_release):
9291     """Initializes this class.
9292
9293     """
9294     Tasklet.__init__(self, lu)
9295
9296     # Parameters
9297     self.instance_name = instance_name
9298     self.mode = mode
9299     self.iallocator_name = iallocator_name
9300     self.remote_node = remote_node
9301     self.disks = disks
9302     self.delay_iallocator = delay_iallocator
9303     self.early_release = early_release
9304
9305     # Runtime data
9306     self.instance = None
9307     self.new_node = None
9308     self.target_node = None
9309     self.other_node = None
9310     self.remote_node_info = None
9311     self.node_secondary_ip = None
9312
9313   @staticmethod
9314   def CheckArguments(mode, remote_node, iallocator):
9315     """Helper function for users of this class.
9316
9317     """
9318     # check for valid parameter combination
9319     if mode == constants.REPLACE_DISK_CHG:
9320       if remote_node is None and iallocator is None:
9321         raise errors.OpPrereqError("When changing the secondary either an"
9322                                    " iallocator script must be used or the"
9323                                    " new node given", errors.ECODE_INVAL)
9324
9325       if remote_node is not None and iallocator is not None:
9326         raise errors.OpPrereqError("Give either the iallocator or the new"
9327                                    " secondary, not both", errors.ECODE_INVAL)
9328
9329     elif remote_node is not None or iallocator is not None:
9330       # Not replacing the secondary
9331       raise errors.OpPrereqError("The iallocator and new node options can"
9332                                  " only be used when changing the"
9333                                  " secondary node", errors.ECODE_INVAL)
9334
9335   @staticmethod
9336   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9337     """Compute a new secondary node using an IAllocator.
9338
9339     """
9340     ial = IAllocator(lu.cfg, lu.rpc,
9341                      mode=constants.IALLOCATOR_MODE_RELOC,
9342                      name=instance_name,
9343                      relocate_from=list(relocate_from))
9344
9345     ial.Run(iallocator_name)
9346
9347     if not ial.success:
9348       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9349                                  " %s" % (iallocator_name, ial.info),
9350                                  errors.ECODE_NORES)
9351
9352     if len(ial.result) != ial.required_nodes:
9353       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9354                                  " of nodes (%s), required %s" %
9355                                  (iallocator_name,
9356                                   len(ial.result), ial.required_nodes),
9357                                  errors.ECODE_FAULT)
9358
9359     remote_node_name = ial.result[0]
9360
9361     lu.LogInfo("Selected new secondary for instance '%s': %s",
9362                instance_name, remote_node_name)
9363
9364     return remote_node_name
9365
9366   def _FindFaultyDisks(self, node_name):
9367     """Wrapper for L{_FindFaultyInstanceDisks}.
9368
9369     """
9370     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9371                                     node_name, True)
9372
9373   def _CheckDisksActivated(self, instance):
9374     """Checks if the instance disks are activated.
9375
9376     @param instance: The instance to check disks
9377     @return: True if they are activated, False otherwise
9378
9379     """
9380     nodes = instance.all_nodes
9381
9382     for idx, dev in enumerate(instance.disks):
9383       for node in nodes:
9384         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9385         self.cfg.SetDiskID(dev, node)
9386
9387         result = self.rpc.call_blockdev_find(node, dev)
9388
9389         if result.offline:
9390           continue
9391         elif result.fail_msg or not result.payload:
9392           return False
9393
9394     return True
9395
9396   def CheckPrereq(self):
9397     """Check prerequisites.
9398
9399     This checks that the instance is in the cluster.
9400
9401     """
9402     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9403     assert instance is not None, \
9404       "Cannot retrieve locked instance %s" % self.instance_name
9405
9406     if instance.disk_template != constants.DT_DRBD8:
9407       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9408                                  " instances", errors.ECODE_INVAL)
9409
9410     if len(instance.secondary_nodes) != 1:
9411       raise errors.OpPrereqError("The instance has a strange layout,"
9412                                  " expected one secondary but found %d" %
9413                                  len(instance.secondary_nodes),
9414                                  errors.ECODE_FAULT)
9415
9416     if not self.delay_iallocator:
9417       self._CheckPrereq2()
9418
9419   def _CheckPrereq2(self):
9420     """Check prerequisites, second part.
9421
9422     This function should always be part of CheckPrereq. It was separated and is
9423     now called from Exec because during node evacuation iallocator was only
9424     called with an unmodified cluster model, not taking planned changes into
9425     account.
9426
9427     """
9428     instance = self.instance
9429     secondary_node = instance.secondary_nodes[0]
9430
9431     if self.iallocator_name is None:
9432       remote_node = self.remote_node
9433     else:
9434       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9435                                        instance.name, instance.secondary_nodes)
9436
9437     if remote_node is None:
9438       self.remote_node_info = None
9439     else:
9440       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9441              "Remote node '%s' is not locked" % remote_node
9442
9443       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9444       assert self.remote_node_info is not None, \
9445         "Cannot retrieve locked node %s" % remote_node
9446
9447     if remote_node == self.instance.primary_node:
9448       raise errors.OpPrereqError("The specified node is the primary node of"
9449                                  " the instance", errors.ECODE_INVAL)
9450
9451     if remote_node == secondary_node:
9452       raise errors.OpPrereqError("The specified node is already the"
9453                                  " secondary node of the instance",
9454                                  errors.ECODE_INVAL)
9455
9456     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9457                                     constants.REPLACE_DISK_CHG):
9458       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9459                                  errors.ECODE_INVAL)
9460
9461     if self.mode == constants.REPLACE_DISK_AUTO:
9462       if not self._CheckDisksActivated(instance):
9463         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9464                                    " first" % self.instance_name,
9465                                    errors.ECODE_STATE)
9466       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9467       faulty_secondary = self._FindFaultyDisks(secondary_node)
9468
9469       if faulty_primary and faulty_secondary:
9470         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9471                                    " one node and can not be repaired"
9472                                    " automatically" % self.instance_name,
9473                                    errors.ECODE_STATE)
9474
9475       if faulty_primary:
9476         self.disks = faulty_primary
9477         self.target_node = instance.primary_node
9478         self.other_node = secondary_node
9479         check_nodes = [self.target_node, self.other_node]
9480       elif faulty_secondary:
9481         self.disks = faulty_secondary
9482         self.target_node = secondary_node
9483         self.other_node = instance.primary_node
9484         check_nodes = [self.target_node, self.other_node]
9485       else:
9486         self.disks = []
9487         check_nodes = []
9488
9489     else:
9490       # Non-automatic modes
9491       if self.mode == constants.REPLACE_DISK_PRI:
9492         self.target_node = instance.primary_node
9493         self.other_node = secondary_node
9494         check_nodes = [self.target_node, self.other_node]
9495
9496       elif self.mode == constants.REPLACE_DISK_SEC:
9497         self.target_node = secondary_node
9498         self.other_node = instance.primary_node
9499         check_nodes = [self.target_node, self.other_node]
9500
9501       elif self.mode == constants.REPLACE_DISK_CHG:
9502         self.new_node = remote_node
9503         self.other_node = instance.primary_node
9504         self.target_node = secondary_node
9505         check_nodes = [self.new_node, self.other_node]
9506
9507         _CheckNodeNotDrained(self.lu, remote_node)
9508         _CheckNodeVmCapable(self.lu, remote_node)
9509
9510         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9511         assert old_node_info is not None
9512         if old_node_info.offline and not self.early_release:
9513           # doesn't make sense to delay the release
9514           self.early_release = True
9515           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9516                           " early-release mode", secondary_node)
9517
9518       else:
9519         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9520                                      self.mode)
9521
9522       # If not specified all disks should be replaced
9523       if not self.disks:
9524         self.disks = range(len(self.instance.disks))
9525
9526     for node in check_nodes:
9527       _CheckNodeOnline(self.lu, node)
9528
9529     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9530                                                           self.other_node,
9531                                                           self.target_node]
9532                               if node_name is not None)
9533
9534     # Release unneeded node locks
9535     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9536
9537     # Release any owned node group
9538     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9539       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9540
9541     # Check whether disks are valid
9542     for disk_idx in self.disks:
9543       instance.FindDisk(disk_idx)
9544
9545     # Get secondary node IP addresses
9546     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9547                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9548
9549   def Exec(self, feedback_fn):
9550     """Execute disk replacement.
9551
9552     This dispatches the disk replacement to the appropriate handler.
9553
9554     """
9555     if self.delay_iallocator:
9556       self._CheckPrereq2()
9557
9558     if __debug__:
9559       # Verify owned locks before starting operation
9560       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9561       assert set(owned_nodes) == set(self.node_secondary_ip), \
9562           ("Incorrect node locks, owning %s, expected %s" %
9563            (owned_nodes, self.node_secondary_ip.keys()))
9564
9565       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9566       assert list(owned_instances) == [self.instance_name], \
9567           "Instance '%s' not locked" % self.instance_name
9568
9569       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9570           "Should not own any node group lock at this point"
9571
9572     if not self.disks:
9573       feedback_fn("No disks need replacement")
9574       return
9575
9576     feedback_fn("Replacing disk(s) %s for %s" %
9577                 (utils.CommaJoin(self.disks), self.instance.name))
9578
9579     activate_disks = (not self.instance.admin_up)
9580
9581     # Activate the instance disks if we're replacing them on a down instance
9582     if activate_disks:
9583       _StartInstanceDisks(self.lu, self.instance, True)
9584
9585     try:
9586       # Should we replace the secondary node?
9587       if self.new_node is not None:
9588         fn = self._ExecDrbd8Secondary
9589       else:
9590         fn = self._ExecDrbd8DiskOnly
9591
9592       result = fn(feedback_fn)
9593     finally:
9594       # Deactivate the instance disks if we're replacing them on a
9595       # down instance
9596       if activate_disks:
9597         _SafeShutdownInstanceDisks(self.lu, self.instance)
9598
9599     if __debug__:
9600       # Verify owned locks
9601       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9602       nodes = frozenset(self.node_secondary_ip)
9603       assert ((self.early_release and not owned_nodes) or
9604               (not self.early_release and not (set(owned_nodes) - nodes))), \
9605         ("Not owning the correct locks, early_release=%s, owned=%r,"
9606          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9607
9608     return result
9609
9610   def _CheckVolumeGroup(self, nodes):
9611     self.lu.LogInfo("Checking volume groups")
9612
9613     vgname = self.cfg.GetVGName()
9614
9615     # Make sure volume group exists on all involved nodes
9616     results = self.rpc.call_vg_list(nodes)
9617     if not results:
9618       raise errors.OpExecError("Can't list volume groups on the nodes")
9619
9620     for node in nodes:
9621       res = results[node]
9622       res.Raise("Error checking node %s" % node)
9623       if vgname not in res.payload:
9624         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9625                                  (vgname, node))
9626
9627   def _CheckDisksExistence(self, nodes):
9628     # Check disk existence
9629     for idx, dev in enumerate(self.instance.disks):
9630       if idx not in self.disks:
9631         continue
9632
9633       for node in nodes:
9634         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9635         self.cfg.SetDiskID(dev, node)
9636
9637         result = self.rpc.call_blockdev_find(node, dev)
9638
9639         msg = result.fail_msg
9640         if msg or not result.payload:
9641           if not msg:
9642             msg = "disk not found"
9643           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9644                                    (idx, node, msg))
9645
9646   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9647     for idx, dev in enumerate(self.instance.disks):
9648       if idx not in self.disks:
9649         continue
9650
9651       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9652                       (idx, node_name))
9653
9654       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9655                                    ldisk=ldisk):
9656         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9657                                  " replace disks for instance %s" %
9658                                  (node_name, self.instance.name))
9659
9660   def _CreateNewStorage(self, node_name):
9661     """Create new storage on the primary or secondary node.
9662
9663     This is only used for same-node replaces, not for changing the
9664     secondary node, hence we don't want to modify the existing disk.
9665
9666     """
9667     iv_names = {}
9668
9669     for idx, dev in enumerate(self.instance.disks):
9670       if idx not in self.disks:
9671         continue
9672
9673       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9674
9675       self.cfg.SetDiskID(dev, node_name)
9676
9677       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9678       names = _GenerateUniqueNames(self.lu, lv_names)
9679
9680       vg_data = dev.children[0].logical_id[0]
9681       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9682                              logical_id=(vg_data, names[0]))
9683       vg_meta = dev.children[1].logical_id[0]
9684       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9685                              logical_id=(vg_meta, names[1]))
9686
9687       new_lvs = [lv_data, lv_meta]
9688       old_lvs = [child.Copy() for child in dev.children]
9689       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9690
9691       # we pass force_create=True to force the LVM creation
9692       for new_lv in new_lvs:
9693         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9694                         _GetInstanceInfoText(self.instance), False)
9695
9696     return iv_names
9697
9698   def _CheckDevices(self, node_name, iv_names):
9699     for name, (dev, _, _) in iv_names.iteritems():
9700       self.cfg.SetDiskID(dev, node_name)
9701
9702       result = self.rpc.call_blockdev_find(node_name, dev)
9703
9704       msg = result.fail_msg
9705       if msg or not result.payload:
9706         if not msg:
9707           msg = "disk not found"
9708         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9709                                  (name, msg))
9710
9711       if result.payload.is_degraded:
9712         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9713
9714   def _RemoveOldStorage(self, node_name, iv_names):
9715     for name, (_, old_lvs, _) in iv_names.iteritems():
9716       self.lu.LogInfo("Remove logical volumes for %s" % name)
9717
9718       for lv in old_lvs:
9719         self.cfg.SetDiskID(lv, node_name)
9720
9721         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9722         if msg:
9723           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9724                              hint="remove unused LVs manually")
9725
9726   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9727     """Replace a disk on the primary or secondary for DRBD 8.
9728
9729     The algorithm for replace is quite complicated:
9730
9731       1. for each disk to be replaced:
9732
9733         1. create new LVs on the target node with unique names
9734         1. detach old LVs from the drbd device
9735         1. rename old LVs to name_replaced.<time_t>
9736         1. rename new LVs to old LVs
9737         1. attach the new LVs (with the old names now) to the drbd device
9738
9739       1. wait for sync across all devices
9740
9741       1. for each modified disk:
9742
9743         1. remove old LVs (which have the name name_replaces.<time_t>)
9744
9745     Failures are not very well handled.
9746
9747     """
9748     steps_total = 6
9749
9750     # Step: check device activation
9751     self.lu.LogStep(1, steps_total, "Check device existence")
9752     self._CheckDisksExistence([self.other_node, self.target_node])
9753     self._CheckVolumeGroup([self.target_node, self.other_node])
9754
9755     # Step: check other node consistency
9756     self.lu.LogStep(2, steps_total, "Check peer consistency")
9757     self._CheckDisksConsistency(self.other_node,
9758                                 self.other_node == self.instance.primary_node,
9759                                 False)
9760
9761     # Step: create new storage
9762     self.lu.LogStep(3, steps_total, "Allocate new storage")
9763     iv_names = self._CreateNewStorage(self.target_node)
9764
9765     # Step: for each lv, detach+rename*2+attach
9766     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9767     for dev, old_lvs, new_lvs in iv_names.itervalues():
9768       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9769
9770       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9771                                                      old_lvs)
9772       result.Raise("Can't detach drbd from local storage on node"
9773                    " %s for device %s" % (self.target_node, dev.iv_name))
9774       #dev.children = []
9775       #cfg.Update(instance)
9776
9777       # ok, we created the new LVs, so now we know we have the needed
9778       # storage; as such, we proceed on the target node to rename
9779       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9780       # using the assumption that logical_id == physical_id (which in
9781       # turn is the unique_id on that node)
9782
9783       # FIXME(iustin): use a better name for the replaced LVs
9784       temp_suffix = int(time.time())
9785       ren_fn = lambda d, suff: (d.physical_id[0],
9786                                 d.physical_id[1] + "_replaced-%s" % suff)
9787
9788       # Build the rename list based on what LVs exist on the node
9789       rename_old_to_new = []
9790       for to_ren in old_lvs:
9791         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9792         if not result.fail_msg and result.payload:
9793           # device exists
9794           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9795
9796       self.lu.LogInfo("Renaming the old LVs on the target node")
9797       result = self.rpc.call_blockdev_rename(self.target_node,
9798                                              rename_old_to_new)
9799       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9800
9801       # Now we rename the new LVs to the old LVs
9802       self.lu.LogInfo("Renaming the new LVs on the target node")
9803       rename_new_to_old = [(new, old.physical_id)
9804                            for old, new in zip(old_lvs, new_lvs)]
9805       result = self.rpc.call_blockdev_rename(self.target_node,
9806                                              rename_new_to_old)
9807       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9808
9809       # Intermediate steps of in memory modifications
9810       for old, new in zip(old_lvs, new_lvs):
9811         new.logical_id = old.logical_id
9812         self.cfg.SetDiskID(new, self.target_node)
9813
9814       # We need to modify old_lvs so that removal later removes the
9815       # right LVs, not the newly added ones; note that old_lvs is a
9816       # copy here
9817       for disk in old_lvs:
9818         disk.logical_id = ren_fn(disk, temp_suffix)
9819         self.cfg.SetDiskID(disk, self.target_node)
9820
9821       # Now that the new lvs have the old name, we can add them to the device
9822       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9823       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9824                                                   new_lvs)
9825       msg = result.fail_msg
9826       if msg:
9827         for new_lv in new_lvs:
9828           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9829                                                new_lv).fail_msg
9830           if msg2:
9831             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9832                                hint=("cleanup manually the unused logical"
9833                                      "volumes"))
9834         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9835
9836     cstep = 5
9837     if self.early_release:
9838       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9839       cstep += 1
9840       self._RemoveOldStorage(self.target_node, iv_names)
9841       # WARNING: we release both node locks here, do not do other RPCs
9842       # than WaitForSync to the primary node
9843       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9844                     names=[self.target_node, self.other_node])
9845
9846     # Wait for sync
9847     # This can fail as the old devices are degraded and _WaitForSync
9848     # does a combined result over all disks, so we don't check its return value
9849     self.lu.LogStep(cstep, steps_total, "Sync devices")
9850     cstep += 1
9851     _WaitForSync(self.lu, self.instance)
9852
9853     # Check all devices manually
9854     self._CheckDevices(self.instance.primary_node, iv_names)
9855
9856     # Step: remove old storage
9857     if not self.early_release:
9858       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9859       cstep += 1
9860       self._RemoveOldStorage(self.target_node, iv_names)
9861
9862   def _ExecDrbd8Secondary(self, feedback_fn):
9863     """Replace the secondary node for DRBD 8.
9864
9865     The algorithm for replace is quite complicated:
9866       - for all disks of the instance:
9867         - create new LVs on the new node with same names
9868         - shutdown the drbd device on the old secondary
9869         - disconnect the drbd network on the primary
9870         - create the drbd device on the new secondary
9871         - network attach the drbd on the primary, using an artifice:
9872           the drbd code for Attach() will connect to the network if it
9873           finds a device which is connected to the good local disks but
9874           not network enabled
9875       - wait for sync across all devices
9876       - remove all disks from the old secondary
9877
9878     Failures are not very well handled.
9879
9880     """
9881     steps_total = 6
9882
9883     pnode = self.instance.primary_node
9884
9885     # Step: check device activation
9886     self.lu.LogStep(1, steps_total, "Check device existence")
9887     self._CheckDisksExistence([self.instance.primary_node])
9888     self._CheckVolumeGroup([self.instance.primary_node])
9889
9890     # Step: check other node consistency
9891     self.lu.LogStep(2, steps_total, "Check peer consistency")
9892     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9893
9894     # Step: create new storage
9895     self.lu.LogStep(3, steps_total, "Allocate new storage")
9896     for idx, dev in enumerate(self.instance.disks):
9897       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9898                       (self.new_node, idx))
9899       # we pass force_create=True to force LVM creation
9900       for new_lv in dev.children:
9901         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9902                         _GetInstanceInfoText(self.instance), False)
9903
9904     # Step 4: dbrd minors and drbd setups changes
9905     # after this, we must manually remove the drbd minors on both the
9906     # error and the success paths
9907     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9908     minors = self.cfg.AllocateDRBDMinor([self.new_node
9909                                          for dev in self.instance.disks],
9910                                         self.instance.name)
9911     logging.debug("Allocated minors %r", minors)
9912
9913     iv_names = {}
9914     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9915       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9916                       (self.new_node, idx))
9917       # create new devices on new_node; note that we create two IDs:
9918       # one without port, so the drbd will be activated without
9919       # networking information on the new node at this stage, and one
9920       # with network, for the latter activation in step 4
9921       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9922       if self.instance.primary_node == o_node1:
9923         p_minor = o_minor1
9924       else:
9925         assert self.instance.primary_node == o_node2, "Three-node instance?"
9926         p_minor = o_minor2
9927
9928       new_alone_id = (self.instance.primary_node, self.new_node, None,
9929                       p_minor, new_minor, o_secret)
9930       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9931                     p_minor, new_minor, o_secret)
9932
9933       iv_names[idx] = (dev, dev.children, new_net_id)
9934       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9935                     new_net_id)
9936       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9937                               logical_id=new_alone_id,
9938                               children=dev.children,
9939                               size=dev.size)
9940       try:
9941         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9942                               _GetInstanceInfoText(self.instance), False)
9943       except errors.GenericError:
9944         self.cfg.ReleaseDRBDMinors(self.instance.name)
9945         raise
9946
9947     # We have new devices, shutdown the drbd on the old secondary
9948     for idx, dev in enumerate(self.instance.disks):
9949       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9950       self.cfg.SetDiskID(dev, self.target_node)
9951       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9952       if msg:
9953         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9954                            "node: %s" % (idx, msg),
9955                            hint=("Please cleanup this device manually as"
9956                                  " soon as possible"))
9957
9958     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9959     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9960                                                self.instance.disks)[pnode]
9961
9962     msg = result.fail_msg
9963     if msg:
9964       # detaches didn't succeed (unlikely)
9965       self.cfg.ReleaseDRBDMinors(self.instance.name)
9966       raise errors.OpExecError("Can't detach the disks from the network on"
9967                                " old node: %s" % (msg,))
9968
9969     # if we managed to detach at least one, we update all the disks of
9970     # the instance to point to the new secondary
9971     self.lu.LogInfo("Updating instance configuration")
9972     for dev, _, new_logical_id in iv_names.itervalues():
9973       dev.logical_id = new_logical_id
9974       self.cfg.SetDiskID(dev, self.instance.primary_node)
9975
9976     self.cfg.Update(self.instance, feedback_fn)
9977
9978     # and now perform the drbd attach
9979     self.lu.LogInfo("Attaching primary drbds to new secondary"
9980                     " (standalone => connected)")
9981     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9982                                             self.new_node],
9983                                            self.node_secondary_ip,
9984                                            self.instance.disks,
9985                                            self.instance.name,
9986                                            False)
9987     for to_node, to_result in result.items():
9988       msg = to_result.fail_msg
9989       if msg:
9990         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9991                            to_node, msg,
9992                            hint=("please do a gnt-instance info to see the"
9993                                  " status of disks"))
9994     cstep = 5
9995     if self.early_release:
9996       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9997       cstep += 1
9998       self._RemoveOldStorage(self.target_node, iv_names)
9999       # WARNING: we release all node locks here, do not do other RPCs
10000       # than WaitForSync to the primary node
10001       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10002                     names=[self.instance.primary_node,
10003                            self.target_node,
10004                            self.new_node])
10005
10006     # Wait for sync
10007     # This can fail as the old devices are degraded and _WaitForSync
10008     # does a combined result over all disks, so we don't check its return value
10009     self.lu.LogStep(cstep, steps_total, "Sync devices")
10010     cstep += 1
10011     _WaitForSync(self.lu, self.instance)
10012
10013     # Check all devices manually
10014     self._CheckDevices(self.instance.primary_node, iv_names)
10015
10016     # Step: remove old storage
10017     if not self.early_release:
10018       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10019       self._RemoveOldStorage(self.target_node, iv_names)
10020
10021
10022 class LURepairNodeStorage(NoHooksLU):
10023   """Repairs the volume group on a node.
10024
10025   """
10026   REQ_BGL = False
10027
10028   def CheckArguments(self):
10029     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10030
10031     storage_type = self.op.storage_type
10032
10033     if (constants.SO_FIX_CONSISTENCY not in
10034         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10035       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10036                                  " repaired" % storage_type,
10037                                  errors.ECODE_INVAL)
10038
10039   def ExpandNames(self):
10040     self.needed_locks = {
10041       locking.LEVEL_NODE: [self.op.node_name],
10042       }
10043
10044   def _CheckFaultyDisks(self, instance, node_name):
10045     """Ensure faulty disks abort the opcode or at least warn."""
10046     try:
10047       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10048                                   node_name, True):
10049         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10050                                    " node '%s'" % (instance.name, node_name),
10051                                    errors.ECODE_STATE)
10052     except errors.OpPrereqError, err:
10053       if self.op.ignore_consistency:
10054         self.proc.LogWarning(str(err.args[0]))
10055       else:
10056         raise
10057
10058   def CheckPrereq(self):
10059     """Check prerequisites.
10060
10061     """
10062     # Check whether any instance on this node has faulty disks
10063     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10064       if not inst.admin_up:
10065         continue
10066       check_nodes = set(inst.all_nodes)
10067       check_nodes.discard(self.op.node_name)
10068       for inst_node_name in check_nodes:
10069         self._CheckFaultyDisks(inst, inst_node_name)
10070
10071   def Exec(self, feedback_fn):
10072     feedback_fn("Repairing storage unit '%s' on %s ..." %
10073                 (self.op.name, self.op.node_name))
10074
10075     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10076     result = self.rpc.call_storage_execute(self.op.node_name,
10077                                            self.op.storage_type, st_args,
10078                                            self.op.name,
10079                                            constants.SO_FIX_CONSISTENCY)
10080     result.Raise("Failed to repair storage unit '%s' on %s" %
10081                  (self.op.name, self.op.node_name))
10082
10083
10084 class LUNodeEvacuate(NoHooksLU):
10085   """Evacuates instances off a list of nodes.
10086
10087   """
10088   REQ_BGL = False
10089
10090   def CheckArguments(self):
10091     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10092
10093   def ExpandNames(self):
10094     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10095
10096     if self.op.remote_node is not None:
10097       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10098       assert self.op.remote_node
10099
10100       if self.op.remote_node == self.op.node_name:
10101         raise errors.OpPrereqError("Can not use evacuated node as a new"
10102                                    " secondary node", errors.ECODE_INVAL)
10103
10104       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10105         raise errors.OpPrereqError("Without the use of an iallocator only"
10106                                    " secondary instances can be evacuated",
10107                                    errors.ECODE_INVAL)
10108
10109     # Declare locks
10110     self.share_locks = _ShareAll()
10111     self.needed_locks = {
10112       locking.LEVEL_INSTANCE: [],
10113       locking.LEVEL_NODEGROUP: [],
10114       locking.LEVEL_NODE: [],
10115       }
10116
10117     if self.op.remote_node is None:
10118       # Iallocator will choose any node(s) in the same group
10119       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10120     else:
10121       group_nodes = frozenset([self.op.remote_node])
10122
10123     # Determine nodes to be locked
10124     self.lock_nodes = set([self.op.node_name]) | group_nodes
10125
10126   def _DetermineInstances(self):
10127     """Builds list of instances to operate on.
10128
10129     """
10130     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10131
10132     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10133       # Primary instances only
10134       inst_fn = _GetNodePrimaryInstances
10135       assert self.op.remote_node is None, \
10136         "Evacuating primary instances requires iallocator"
10137     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10138       # Secondary instances only
10139       inst_fn = _GetNodeSecondaryInstances
10140     else:
10141       # All instances
10142       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10143       inst_fn = _GetNodeInstances
10144
10145     return inst_fn(self.cfg, self.op.node_name)
10146
10147   def DeclareLocks(self, level):
10148     if level == locking.LEVEL_INSTANCE:
10149       # Lock instances optimistically, needs verification once node and group
10150       # locks have been acquired
10151       self.needed_locks[locking.LEVEL_INSTANCE] = \
10152         set(i.name for i in self._DetermineInstances())
10153
10154     elif level == locking.LEVEL_NODEGROUP:
10155       # Lock node groups optimistically, needs verification once nodes have
10156       # been acquired
10157       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10158         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10159
10160     elif level == locking.LEVEL_NODE:
10161       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10162
10163   def CheckPrereq(self):
10164     # Verify locks
10165     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10166     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10167     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10168
10169     assert owned_nodes == self.lock_nodes
10170
10171     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10172     if owned_groups != wanted_groups:
10173       raise errors.OpExecError("Node groups changed since locks were acquired,"
10174                                " current groups are '%s', used to be '%s'" %
10175                                (utils.CommaJoin(wanted_groups),
10176                                 utils.CommaJoin(owned_groups)))
10177
10178     # Determine affected instances
10179     self.instances = self._DetermineInstances()
10180     self.instance_names = [i.name for i in self.instances]
10181
10182     if set(self.instance_names) != owned_instances:
10183       raise errors.OpExecError("Instances on node '%s' changed since locks"
10184                                " were acquired, current instances are '%s',"
10185                                " used to be '%s'" %
10186                                (self.op.node_name,
10187                                 utils.CommaJoin(self.instance_names),
10188                                 utils.CommaJoin(owned_instances)))
10189
10190     if self.instance_names:
10191       self.LogInfo("Evacuating instances from node '%s': %s",
10192                    self.op.node_name,
10193                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10194     else:
10195       self.LogInfo("No instances to evacuate from node '%s'",
10196                    self.op.node_name)
10197
10198     if self.op.remote_node is not None:
10199       for i in self.instances:
10200         if i.primary_node == self.op.remote_node:
10201           raise errors.OpPrereqError("Node %s is the primary node of"
10202                                      " instance %s, cannot use it as"
10203                                      " secondary" %
10204                                      (self.op.remote_node, i.name),
10205                                      errors.ECODE_INVAL)
10206
10207   def Exec(self, feedback_fn):
10208     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10209
10210     if not self.instance_names:
10211       # No instances to evacuate
10212       jobs = []
10213
10214     elif self.op.iallocator is not None:
10215       # TODO: Implement relocation to other group
10216       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10217                        evac_mode=self.op.mode,
10218                        instances=list(self.instance_names))
10219
10220       ial.Run(self.op.iallocator)
10221
10222       if not ial.success:
10223         raise errors.OpPrereqError("Can't compute node evacuation using"
10224                                    " iallocator '%s': %s" %
10225                                    (self.op.iallocator, ial.info),
10226                                    errors.ECODE_NORES)
10227
10228       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10229
10230     elif self.op.remote_node is not None:
10231       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10232       jobs = [
10233         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10234                                         remote_node=self.op.remote_node,
10235                                         disks=[],
10236                                         mode=constants.REPLACE_DISK_CHG,
10237                                         early_release=self.op.early_release)]
10238         for instance_name in self.instance_names
10239         ]
10240
10241     else:
10242       raise errors.ProgrammerError("No iallocator or remote node")
10243
10244     return ResultWithJobs(jobs)
10245
10246
10247 def _SetOpEarlyRelease(early_release, op):
10248   """Sets C{early_release} flag on opcodes if available.
10249
10250   """
10251   try:
10252     op.early_release = early_release
10253   except AttributeError:
10254     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10255
10256   return op
10257
10258
10259 def _NodeEvacDest(use_nodes, group, nodes):
10260   """Returns group or nodes depending on caller's choice.
10261
10262   """
10263   if use_nodes:
10264     return utils.CommaJoin(nodes)
10265   else:
10266     return group
10267
10268
10269 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10270   """Unpacks the result of change-group and node-evacuate iallocator requests.
10271
10272   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10273   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10274
10275   @type lu: L{LogicalUnit}
10276   @param lu: Logical unit instance
10277   @type alloc_result: tuple/list
10278   @param alloc_result: Result from iallocator
10279   @type early_release: bool
10280   @param early_release: Whether to release locks early if possible
10281   @type use_nodes: bool
10282   @param use_nodes: Whether to display node names instead of groups
10283
10284   """
10285   (moved, failed, jobs) = alloc_result
10286
10287   if failed:
10288     lu.LogWarning("Unable to evacuate instances %s",
10289                   utils.CommaJoin("%s (%s)" % (name, reason)
10290                                   for (name, reason) in failed))
10291
10292   if moved:
10293     lu.LogInfo("Instances to be moved: %s",
10294                utils.CommaJoin("%s (to %s)" %
10295                                (name, _NodeEvacDest(use_nodes, group, nodes))
10296                                for (name, group, nodes) in moved))
10297
10298   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10299               map(opcodes.OpCode.LoadOpCode, ops))
10300           for ops in jobs]
10301
10302
10303 class LUInstanceGrowDisk(LogicalUnit):
10304   """Grow a disk of an instance.
10305
10306   """
10307   HPATH = "disk-grow"
10308   HTYPE = constants.HTYPE_INSTANCE
10309   REQ_BGL = False
10310
10311   def ExpandNames(self):
10312     self._ExpandAndLockInstance()
10313     self.needed_locks[locking.LEVEL_NODE] = []
10314     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10315
10316   def DeclareLocks(self, level):
10317     if level == locking.LEVEL_NODE:
10318       self._LockInstancesNodes()
10319
10320   def BuildHooksEnv(self):
10321     """Build hooks env.
10322
10323     This runs on the master, the primary and all the secondaries.
10324
10325     """
10326     env = {
10327       "DISK": self.op.disk,
10328       "AMOUNT": self.op.amount,
10329       }
10330     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10331     return env
10332
10333   def BuildHooksNodes(self):
10334     """Build hooks nodes.
10335
10336     """
10337     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10338     return (nl, nl)
10339
10340   def CheckPrereq(self):
10341     """Check prerequisites.
10342
10343     This checks that the instance is in the cluster.
10344
10345     """
10346     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10347     assert instance is not None, \
10348       "Cannot retrieve locked instance %s" % self.op.instance_name
10349     nodenames = list(instance.all_nodes)
10350     for node in nodenames:
10351       _CheckNodeOnline(self, node)
10352
10353     self.instance = instance
10354
10355     if instance.disk_template not in constants.DTS_GROWABLE:
10356       raise errors.OpPrereqError("Instance's disk layout does not support"
10357                                  " growing", errors.ECODE_INVAL)
10358
10359     self.disk = instance.FindDisk(self.op.disk)
10360
10361     if instance.disk_template not in (constants.DT_FILE,
10362                                       constants.DT_SHARED_FILE):
10363       # TODO: check the free disk space for file, when that feature will be
10364       # supported
10365       _CheckNodesFreeDiskPerVG(self, nodenames,
10366                                self.disk.ComputeGrowth(self.op.amount))
10367
10368   def Exec(self, feedback_fn):
10369     """Execute disk grow.
10370
10371     """
10372     instance = self.instance
10373     disk = self.disk
10374
10375     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10376     if not disks_ok:
10377       raise errors.OpExecError("Cannot activate block device to grow")
10378
10379     # First run all grow ops in dry-run mode
10380     for node in instance.all_nodes:
10381       self.cfg.SetDiskID(disk, node)
10382       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10383       result.Raise("Grow request failed to node %s" % node)
10384
10385     # We know that (as far as we can test) operations across different
10386     # nodes will succeed, time to run it for real
10387     for node in instance.all_nodes:
10388       self.cfg.SetDiskID(disk, node)
10389       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10390       result.Raise("Grow request failed to node %s" % node)
10391
10392       # TODO: Rewrite code to work properly
10393       # DRBD goes into sync mode for a short amount of time after executing the
10394       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10395       # calling "resize" in sync mode fails. Sleeping for a short amount of
10396       # time is a work-around.
10397       time.sleep(5)
10398
10399     disk.RecordGrow(self.op.amount)
10400     self.cfg.Update(instance, feedback_fn)
10401     if self.op.wait_for_sync:
10402       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10403       if disk_abort:
10404         self.proc.LogWarning("Disk sync-ing has not returned a good"
10405                              " status; please check the instance")
10406       if not instance.admin_up:
10407         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10408     elif not instance.admin_up:
10409       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10410                            " not supposed to be running because no wait for"
10411                            " sync mode was requested")
10412
10413
10414 class LUInstanceQueryData(NoHooksLU):
10415   """Query runtime instance data.
10416
10417   """
10418   REQ_BGL = False
10419
10420   def ExpandNames(self):
10421     self.needed_locks = {}
10422
10423     # Use locking if requested or when non-static information is wanted
10424     if not (self.op.static or self.op.use_locking):
10425       self.LogWarning("Non-static data requested, locks need to be acquired")
10426       self.op.use_locking = True
10427
10428     if self.op.instances or not self.op.use_locking:
10429       # Expand instance names right here
10430       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10431     else:
10432       # Will use acquired locks
10433       self.wanted_names = None
10434
10435     if self.op.use_locking:
10436       self.share_locks = _ShareAll()
10437
10438       if self.wanted_names is None:
10439         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10440       else:
10441         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10442
10443       self.needed_locks[locking.LEVEL_NODE] = []
10444       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10445
10446   def DeclareLocks(self, level):
10447     if self.op.use_locking and level == locking.LEVEL_NODE:
10448       self._LockInstancesNodes()
10449
10450   def CheckPrereq(self):
10451     """Check prerequisites.
10452
10453     This only checks the optional instance list against the existing names.
10454
10455     """
10456     if self.wanted_names is None:
10457       assert self.op.use_locking, "Locking was not used"
10458       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10459
10460     self.wanted_instances = \
10461         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10462
10463   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10464     """Returns the status of a block device
10465
10466     """
10467     if self.op.static or not node:
10468       return None
10469
10470     self.cfg.SetDiskID(dev, node)
10471
10472     result = self.rpc.call_blockdev_find(node, dev)
10473     if result.offline:
10474       return None
10475
10476     result.Raise("Can't compute disk status for %s" % instance_name)
10477
10478     status = result.payload
10479     if status is None:
10480       return None
10481
10482     return (status.dev_path, status.major, status.minor,
10483             status.sync_percent, status.estimated_time,
10484             status.is_degraded, status.ldisk_status)
10485
10486   def _ComputeDiskStatus(self, instance, snode, dev):
10487     """Compute block device status.
10488
10489     """
10490     if dev.dev_type in constants.LDS_DRBD:
10491       # we change the snode then (otherwise we use the one passed in)
10492       if dev.logical_id[0] == instance.primary_node:
10493         snode = dev.logical_id[1]
10494       else:
10495         snode = dev.logical_id[0]
10496
10497     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10498                                               instance.name, dev)
10499     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10500
10501     if dev.children:
10502       dev_children = map(compat.partial(self._ComputeDiskStatus,
10503                                         instance, snode),
10504                          dev.children)
10505     else:
10506       dev_children = []
10507
10508     return {
10509       "iv_name": dev.iv_name,
10510       "dev_type": dev.dev_type,
10511       "logical_id": dev.logical_id,
10512       "physical_id": dev.physical_id,
10513       "pstatus": dev_pstatus,
10514       "sstatus": dev_sstatus,
10515       "children": dev_children,
10516       "mode": dev.mode,
10517       "size": dev.size,
10518       }
10519
10520   def Exec(self, feedback_fn):
10521     """Gather and return data"""
10522     result = {}
10523
10524     cluster = self.cfg.GetClusterInfo()
10525
10526     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10527                                           for i in self.wanted_instances)
10528     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10529       if self.op.static or pnode.offline:
10530         remote_state = None
10531         if pnode.offline:
10532           self.LogWarning("Primary node %s is marked offline, returning static"
10533                           " information only for instance %s" %
10534                           (pnode.name, instance.name))
10535       else:
10536         remote_info = self.rpc.call_instance_info(instance.primary_node,
10537                                                   instance.name,
10538                                                   instance.hypervisor)
10539         remote_info.Raise("Error checking node %s" % instance.primary_node)
10540         remote_info = remote_info.payload
10541         if remote_info and "state" in remote_info:
10542           remote_state = "up"
10543         else:
10544           remote_state = "down"
10545
10546       if instance.admin_up:
10547         config_state = "up"
10548       else:
10549         config_state = "down"
10550
10551       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10552                   instance.disks)
10553
10554       result[instance.name] = {
10555         "name": instance.name,
10556         "config_state": config_state,
10557         "run_state": remote_state,
10558         "pnode": instance.primary_node,
10559         "snodes": instance.secondary_nodes,
10560         "os": instance.os,
10561         # this happens to be the same format used for hooks
10562         "nics": _NICListToTuple(self, instance.nics),
10563         "disk_template": instance.disk_template,
10564         "disks": disks,
10565         "hypervisor": instance.hypervisor,
10566         "network_port": instance.network_port,
10567         "hv_instance": instance.hvparams,
10568         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10569         "be_instance": instance.beparams,
10570         "be_actual": cluster.FillBE(instance),
10571         "os_instance": instance.osparams,
10572         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10573         "serial_no": instance.serial_no,
10574         "mtime": instance.mtime,
10575         "ctime": instance.ctime,
10576         "uuid": instance.uuid,
10577         }
10578
10579     return result
10580
10581
10582 class LUInstanceSetParams(LogicalUnit):
10583   """Modifies an instances's parameters.
10584
10585   """
10586   HPATH = "instance-modify"
10587   HTYPE = constants.HTYPE_INSTANCE
10588   REQ_BGL = False
10589
10590   def CheckArguments(self):
10591     if not (self.op.nics or self.op.disks or self.op.disk_template or
10592             self.op.hvparams or self.op.beparams or self.op.os_name):
10593       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10594
10595     if self.op.hvparams:
10596       _CheckGlobalHvParams(self.op.hvparams)
10597
10598     # Disk validation
10599     disk_addremove = 0
10600     for disk_op, disk_dict in self.op.disks:
10601       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10602       if disk_op == constants.DDM_REMOVE:
10603         disk_addremove += 1
10604         continue
10605       elif disk_op == constants.DDM_ADD:
10606         disk_addremove += 1
10607       else:
10608         if not isinstance(disk_op, int):
10609           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10610         if not isinstance(disk_dict, dict):
10611           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10612           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10613
10614       if disk_op == constants.DDM_ADD:
10615         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10616         if mode not in constants.DISK_ACCESS_SET:
10617           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10618                                      errors.ECODE_INVAL)
10619         size = disk_dict.get(constants.IDISK_SIZE, None)
10620         if size is None:
10621           raise errors.OpPrereqError("Required disk parameter size missing",
10622                                      errors.ECODE_INVAL)
10623         try:
10624           size = int(size)
10625         except (TypeError, ValueError), err:
10626           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10627                                      str(err), errors.ECODE_INVAL)
10628         disk_dict[constants.IDISK_SIZE] = size
10629       else:
10630         # modification of disk
10631         if constants.IDISK_SIZE in disk_dict:
10632           raise errors.OpPrereqError("Disk size change not possible, use"
10633                                      " grow-disk", errors.ECODE_INVAL)
10634
10635     if disk_addremove > 1:
10636       raise errors.OpPrereqError("Only one disk add or remove operation"
10637                                  " supported at a time", errors.ECODE_INVAL)
10638
10639     if self.op.disks and self.op.disk_template is not None:
10640       raise errors.OpPrereqError("Disk template conversion and other disk"
10641                                  " changes not supported at the same time",
10642                                  errors.ECODE_INVAL)
10643
10644     if (self.op.disk_template and
10645         self.op.disk_template in constants.DTS_INT_MIRROR and
10646         self.op.remote_node is None):
10647       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10648                                  " one requires specifying a secondary node",
10649                                  errors.ECODE_INVAL)
10650
10651     # NIC validation
10652     nic_addremove = 0
10653     for nic_op, nic_dict in self.op.nics:
10654       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10655       if nic_op == constants.DDM_REMOVE:
10656         nic_addremove += 1
10657         continue
10658       elif nic_op == constants.DDM_ADD:
10659         nic_addremove += 1
10660       else:
10661         if not isinstance(nic_op, int):
10662           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10663         if not isinstance(nic_dict, dict):
10664           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10665           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10666
10667       # nic_dict should be a dict
10668       nic_ip = nic_dict.get(constants.INIC_IP, None)
10669       if nic_ip is not None:
10670         if nic_ip.lower() == constants.VALUE_NONE:
10671           nic_dict[constants.INIC_IP] = None
10672         else:
10673           if not netutils.IPAddress.IsValid(nic_ip):
10674             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10675                                        errors.ECODE_INVAL)
10676
10677       nic_bridge = nic_dict.get("bridge", None)
10678       nic_link = nic_dict.get(constants.INIC_LINK, None)
10679       if nic_bridge and nic_link:
10680         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10681                                    " at the same time", errors.ECODE_INVAL)
10682       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10683         nic_dict["bridge"] = None
10684       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10685         nic_dict[constants.INIC_LINK] = None
10686
10687       if nic_op == constants.DDM_ADD:
10688         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10689         if nic_mac is None:
10690           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10691
10692       if constants.INIC_MAC in nic_dict:
10693         nic_mac = nic_dict[constants.INIC_MAC]
10694         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10695           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10696
10697         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10698           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10699                                      " modifying an existing nic",
10700                                      errors.ECODE_INVAL)
10701
10702     if nic_addremove > 1:
10703       raise errors.OpPrereqError("Only one NIC add or remove operation"
10704                                  " supported at a time", errors.ECODE_INVAL)
10705
10706   def ExpandNames(self):
10707     self._ExpandAndLockInstance()
10708     self.needed_locks[locking.LEVEL_NODE] = []
10709     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10710
10711   def DeclareLocks(self, level):
10712     if level == locking.LEVEL_NODE:
10713       self._LockInstancesNodes()
10714       if self.op.disk_template and self.op.remote_node:
10715         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10716         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10717
10718   def BuildHooksEnv(self):
10719     """Build hooks env.
10720
10721     This runs on the master, primary and secondaries.
10722
10723     """
10724     args = dict()
10725     if constants.BE_MEMORY in self.be_new:
10726       args["memory"] = self.be_new[constants.BE_MEMORY]
10727     if constants.BE_VCPUS in self.be_new:
10728       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10729     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10730     # information at all.
10731     if self.op.nics:
10732       args["nics"] = []
10733       nic_override = dict(self.op.nics)
10734       for idx, nic in enumerate(self.instance.nics):
10735         if idx in nic_override:
10736           this_nic_override = nic_override[idx]
10737         else:
10738           this_nic_override = {}
10739         if constants.INIC_IP in this_nic_override:
10740           ip = this_nic_override[constants.INIC_IP]
10741         else:
10742           ip = nic.ip
10743         if constants.INIC_MAC in this_nic_override:
10744           mac = this_nic_override[constants.INIC_MAC]
10745         else:
10746           mac = nic.mac
10747         if idx in self.nic_pnew:
10748           nicparams = self.nic_pnew[idx]
10749         else:
10750           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10751         mode = nicparams[constants.NIC_MODE]
10752         link = nicparams[constants.NIC_LINK]
10753         args["nics"].append((ip, mac, mode, link))
10754       if constants.DDM_ADD in nic_override:
10755         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10756         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10757         nicparams = self.nic_pnew[constants.DDM_ADD]
10758         mode = nicparams[constants.NIC_MODE]
10759         link = nicparams[constants.NIC_LINK]
10760         args["nics"].append((ip, mac, mode, link))
10761       elif constants.DDM_REMOVE in nic_override:
10762         del args["nics"][-1]
10763
10764     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10765     if self.op.disk_template:
10766       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10767
10768     return env
10769
10770   def BuildHooksNodes(self):
10771     """Build hooks nodes.
10772
10773     """
10774     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10775     return (nl, nl)
10776
10777   def CheckPrereq(self):
10778     """Check prerequisites.
10779
10780     This only checks the instance list against the existing names.
10781
10782     """
10783     # checking the new params on the primary/secondary nodes
10784
10785     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10786     cluster = self.cluster = self.cfg.GetClusterInfo()
10787     assert self.instance is not None, \
10788       "Cannot retrieve locked instance %s" % self.op.instance_name
10789     pnode = instance.primary_node
10790     nodelist = list(instance.all_nodes)
10791
10792     # OS change
10793     if self.op.os_name and not self.op.force:
10794       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10795                       self.op.force_variant)
10796       instance_os = self.op.os_name
10797     else:
10798       instance_os = instance.os
10799
10800     if self.op.disk_template:
10801       if instance.disk_template == self.op.disk_template:
10802         raise errors.OpPrereqError("Instance already has disk template %s" %
10803                                    instance.disk_template, errors.ECODE_INVAL)
10804
10805       if (instance.disk_template,
10806           self.op.disk_template) not in self._DISK_CONVERSIONS:
10807         raise errors.OpPrereqError("Unsupported disk template conversion from"
10808                                    " %s to %s" % (instance.disk_template,
10809                                                   self.op.disk_template),
10810                                    errors.ECODE_INVAL)
10811       _CheckInstanceDown(self, instance, "cannot change disk template")
10812       if self.op.disk_template in constants.DTS_INT_MIRROR:
10813         if self.op.remote_node == pnode:
10814           raise errors.OpPrereqError("Given new secondary node %s is the same"
10815                                      " as the primary node of the instance" %
10816                                      self.op.remote_node, errors.ECODE_STATE)
10817         _CheckNodeOnline(self, self.op.remote_node)
10818         _CheckNodeNotDrained(self, self.op.remote_node)
10819         # FIXME: here we assume that the old instance type is DT_PLAIN
10820         assert instance.disk_template == constants.DT_PLAIN
10821         disks = [{constants.IDISK_SIZE: d.size,
10822                   constants.IDISK_VG: d.logical_id[0]}
10823                  for d in instance.disks]
10824         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10825         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10826
10827     # hvparams processing
10828     if self.op.hvparams:
10829       hv_type = instance.hypervisor
10830       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10831       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10832       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10833
10834       # local check
10835       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10836       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10837       self.hv_new = hv_new # the new actual values
10838       self.hv_inst = i_hvdict # the new dict (without defaults)
10839     else:
10840       self.hv_new = self.hv_inst = {}
10841
10842     # beparams processing
10843     if self.op.beparams:
10844       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10845                                    use_none=True)
10846       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10847       be_new = cluster.SimpleFillBE(i_bedict)
10848       self.be_new = be_new # the new actual values
10849       self.be_inst = i_bedict # the new dict (without defaults)
10850     else:
10851       self.be_new = self.be_inst = {}
10852     be_old = cluster.FillBE(instance)
10853
10854     # osparams processing
10855     if self.op.osparams:
10856       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10857       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10858       self.os_inst = i_osdict # the new dict (without defaults)
10859     else:
10860       self.os_inst = {}
10861
10862     self.warn = []
10863
10864     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10865         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10866       mem_check_list = [pnode]
10867       if be_new[constants.BE_AUTO_BALANCE]:
10868         # either we changed auto_balance to yes or it was from before
10869         mem_check_list.extend(instance.secondary_nodes)
10870       instance_info = self.rpc.call_instance_info(pnode, instance.name,
10871                                                   instance.hypervisor)
10872       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10873                                          instance.hypervisor)
10874       pninfo = nodeinfo[pnode]
10875       msg = pninfo.fail_msg
10876       if msg:
10877         # Assume the primary node is unreachable and go ahead
10878         self.warn.append("Can't get info from primary node %s: %s" %
10879                          (pnode, msg))
10880       elif not isinstance(pninfo.payload.get("memory_free", None), int):
10881         self.warn.append("Node data from primary node %s doesn't contain"
10882                          " free memory information" % pnode)
10883       elif instance_info.fail_msg:
10884         self.warn.append("Can't get instance runtime information: %s" %
10885                         instance_info.fail_msg)
10886       else:
10887         if instance_info.payload:
10888           current_mem = int(instance_info.payload["memory"])
10889         else:
10890           # Assume instance not running
10891           # (there is a slight race condition here, but it's not very probable,
10892           # and we have no other way to check)
10893           current_mem = 0
10894         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10895                     pninfo.payload["memory_free"])
10896         if miss_mem > 0:
10897           raise errors.OpPrereqError("This change will prevent the instance"
10898                                      " from starting, due to %d MB of memory"
10899                                      " missing on its primary node" % miss_mem,
10900                                      errors.ECODE_NORES)
10901
10902       if be_new[constants.BE_AUTO_BALANCE]:
10903         for node, nres in nodeinfo.items():
10904           if node not in instance.secondary_nodes:
10905             continue
10906           nres.Raise("Can't get info from secondary node %s" % node,
10907                      prereq=True, ecode=errors.ECODE_STATE)
10908           if not isinstance(nres.payload.get("memory_free", None), int):
10909             raise errors.OpPrereqError("Secondary node %s didn't return free"
10910                                        " memory information" % node,
10911                                        errors.ECODE_STATE)
10912           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10913             raise errors.OpPrereqError("This change will prevent the instance"
10914                                        " from failover to its secondary node"
10915                                        " %s, due to not enough memory" % node,
10916                                        errors.ECODE_STATE)
10917
10918     # NIC processing
10919     self.nic_pnew = {}
10920     self.nic_pinst = {}
10921     for nic_op, nic_dict in self.op.nics:
10922       if nic_op == constants.DDM_REMOVE:
10923         if not instance.nics:
10924           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10925                                      errors.ECODE_INVAL)
10926         continue
10927       if nic_op != constants.DDM_ADD:
10928         # an existing nic
10929         if not instance.nics:
10930           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10931                                      " no NICs" % nic_op,
10932                                      errors.ECODE_INVAL)
10933         if nic_op < 0 or nic_op >= len(instance.nics):
10934           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10935                                      " are 0 to %d" %
10936                                      (nic_op, len(instance.nics) - 1),
10937                                      errors.ECODE_INVAL)
10938         old_nic_params = instance.nics[nic_op].nicparams
10939         old_nic_ip = instance.nics[nic_op].ip
10940       else:
10941         old_nic_params = {}
10942         old_nic_ip = None
10943
10944       update_params_dict = dict([(key, nic_dict[key])
10945                                  for key in constants.NICS_PARAMETERS
10946                                  if key in nic_dict])
10947
10948       if "bridge" in nic_dict:
10949         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10950
10951       new_nic_params = _GetUpdatedParams(old_nic_params,
10952                                          update_params_dict)
10953       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10954       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10955       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10956       self.nic_pinst[nic_op] = new_nic_params
10957       self.nic_pnew[nic_op] = new_filled_nic_params
10958       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10959
10960       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10961         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10962         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10963         if msg:
10964           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10965           if self.op.force:
10966             self.warn.append(msg)
10967           else:
10968             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10969       if new_nic_mode == constants.NIC_MODE_ROUTED:
10970         if constants.INIC_IP in nic_dict:
10971           nic_ip = nic_dict[constants.INIC_IP]
10972         else:
10973           nic_ip = old_nic_ip
10974         if nic_ip is None:
10975           raise errors.OpPrereqError("Cannot set the nic ip to None"
10976                                      " on a routed nic", errors.ECODE_INVAL)
10977       if constants.INIC_MAC in nic_dict:
10978         nic_mac = nic_dict[constants.INIC_MAC]
10979         if nic_mac is None:
10980           raise errors.OpPrereqError("Cannot set the nic mac to None",
10981                                      errors.ECODE_INVAL)
10982         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10983           # otherwise generate the mac
10984           nic_dict[constants.INIC_MAC] = \
10985             self.cfg.GenerateMAC(self.proc.GetECId())
10986         else:
10987           # or validate/reserve the current one
10988           try:
10989             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10990           except errors.ReservationError:
10991             raise errors.OpPrereqError("MAC address %s already in use"
10992                                        " in cluster" % nic_mac,
10993                                        errors.ECODE_NOTUNIQUE)
10994
10995     # DISK processing
10996     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10997       raise errors.OpPrereqError("Disk operations not supported for"
10998                                  " diskless instances",
10999                                  errors.ECODE_INVAL)
11000     for disk_op, _ in self.op.disks:
11001       if disk_op == constants.DDM_REMOVE:
11002         if len(instance.disks) == 1:
11003           raise errors.OpPrereqError("Cannot remove the last disk of"
11004                                      " an instance", errors.ECODE_INVAL)
11005         _CheckInstanceDown(self, instance, "cannot remove disks")
11006
11007       if (disk_op == constants.DDM_ADD and
11008           len(instance.disks) >= constants.MAX_DISKS):
11009         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11010                                    " add more" % constants.MAX_DISKS,
11011                                    errors.ECODE_STATE)
11012       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11013         # an existing disk
11014         if disk_op < 0 or disk_op >= len(instance.disks):
11015           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11016                                      " are 0 to %d" %
11017                                      (disk_op, len(instance.disks)),
11018                                      errors.ECODE_INVAL)
11019
11020     return
11021
11022   def _ConvertPlainToDrbd(self, feedback_fn):
11023     """Converts an instance from plain to drbd.
11024
11025     """
11026     feedback_fn("Converting template to drbd")
11027     instance = self.instance
11028     pnode = instance.primary_node
11029     snode = self.op.remote_node
11030
11031     # create a fake disk info for _GenerateDiskTemplate
11032     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11033                   constants.IDISK_VG: d.logical_id[0]}
11034                  for d in instance.disks]
11035     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11036                                       instance.name, pnode, [snode],
11037                                       disk_info, None, None, 0, feedback_fn)
11038     info = _GetInstanceInfoText(instance)
11039     feedback_fn("Creating aditional volumes...")
11040     # first, create the missing data and meta devices
11041     for disk in new_disks:
11042       # unfortunately this is... not too nice
11043       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11044                             info, True)
11045       for child in disk.children:
11046         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11047     # at this stage, all new LVs have been created, we can rename the
11048     # old ones
11049     feedback_fn("Renaming original volumes...")
11050     rename_list = [(o, n.children[0].logical_id)
11051                    for (o, n) in zip(instance.disks, new_disks)]
11052     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11053     result.Raise("Failed to rename original LVs")
11054
11055     feedback_fn("Initializing DRBD devices...")
11056     # all child devices are in place, we can now create the DRBD devices
11057     for disk in new_disks:
11058       for node in [pnode, snode]:
11059         f_create = node == pnode
11060         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11061
11062     # at this point, the instance has been modified
11063     instance.disk_template = constants.DT_DRBD8
11064     instance.disks = new_disks
11065     self.cfg.Update(instance, feedback_fn)
11066
11067     # disks are created, waiting for sync
11068     disk_abort = not _WaitForSync(self, instance,
11069                                   oneshot=not self.op.wait_for_sync)
11070     if disk_abort:
11071       raise errors.OpExecError("There are some degraded disks for"
11072                                " this instance, please cleanup manually")
11073
11074   def _ConvertDrbdToPlain(self, feedback_fn):
11075     """Converts an instance from drbd to plain.
11076
11077     """
11078     instance = self.instance
11079     assert len(instance.secondary_nodes) == 1
11080     pnode = instance.primary_node
11081     snode = instance.secondary_nodes[0]
11082     feedback_fn("Converting template to plain")
11083
11084     old_disks = instance.disks
11085     new_disks = [d.children[0] for d in old_disks]
11086
11087     # copy over size and mode
11088     for parent, child in zip(old_disks, new_disks):
11089       child.size = parent.size
11090       child.mode = parent.mode
11091
11092     # update instance structure
11093     instance.disks = new_disks
11094     instance.disk_template = constants.DT_PLAIN
11095     self.cfg.Update(instance, feedback_fn)
11096
11097     feedback_fn("Removing volumes on the secondary node...")
11098     for disk in old_disks:
11099       self.cfg.SetDiskID(disk, snode)
11100       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11101       if msg:
11102         self.LogWarning("Could not remove block device %s on node %s,"
11103                         " continuing anyway: %s", disk.iv_name, snode, msg)
11104
11105     feedback_fn("Removing unneeded volumes on the primary node...")
11106     for idx, disk in enumerate(old_disks):
11107       meta = disk.children[1]
11108       self.cfg.SetDiskID(meta, pnode)
11109       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11110       if msg:
11111         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11112                         " continuing anyway: %s", idx, pnode, msg)
11113
11114   def Exec(self, feedback_fn):
11115     """Modifies an instance.
11116
11117     All parameters take effect only at the next restart of the instance.
11118
11119     """
11120     # Process here the warnings from CheckPrereq, as we don't have a
11121     # feedback_fn there.
11122     for warn in self.warn:
11123       feedback_fn("WARNING: %s" % warn)
11124
11125     result = []
11126     instance = self.instance
11127     # disk changes
11128     for disk_op, disk_dict in self.op.disks:
11129       if disk_op == constants.DDM_REMOVE:
11130         # remove the last disk
11131         device = instance.disks.pop()
11132         device_idx = len(instance.disks)
11133         for node, disk in device.ComputeNodeTree(instance.primary_node):
11134           self.cfg.SetDiskID(disk, node)
11135           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11136           if msg:
11137             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11138                             " continuing anyway", device_idx, node, msg)
11139         result.append(("disk/%d" % device_idx, "remove"))
11140       elif disk_op == constants.DDM_ADD:
11141         # add a new disk
11142         if instance.disk_template in (constants.DT_FILE,
11143                                         constants.DT_SHARED_FILE):
11144           file_driver, file_path = instance.disks[0].logical_id
11145           file_path = os.path.dirname(file_path)
11146         else:
11147           file_driver = file_path = None
11148         disk_idx_base = len(instance.disks)
11149         new_disk = _GenerateDiskTemplate(self,
11150                                          instance.disk_template,
11151                                          instance.name, instance.primary_node,
11152                                          instance.secondary_nodes,
11153                                          [disk_dict],
11154                                          file_path,
11155                                          file_driver,
11156                                          disk_idx_base, feedback_fn)[0]
11157         instance.disks.append(new_disk)
11158         info = _GetInstanceInfoText(instance)
11159
11160         logging.info("Creating volume %s for instance %s",
11161                      new_disk.iv_name, instance.name)
11162         # Note: this needs to be kept in sync with _CreateDisks
11163         #HARDCODE
11164         for node in instance.all_nodes:
11165           f_create = node == instance.primary_node
11166           try:
11167             _CreateBlockDev(self, node, instance, new_disk,
11168                             f_create, info, f_create)
11169           except errors.OpExecError, err:
11170             self.LogWarning("Failed to create volume %s (%s) on"
11171                             " node %s: %s",
11172                             new_disk.iv_name, new_disk, node, err)
11173         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11174                        (new_disk.size, new_disk.mode)))
11175       else:
11176         # change a given disk
11177         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11178         result.append(("disk.mode/%d" % disk_op,
11179                        disk_dict[constants.IDISK_MODE]))
11180
11181     if self.op.disk_template:
11182       r_shut = _ShutdownInstanceDisks(self, instance)
11183       if not r_shut:
11184         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11185                                  " proceed with disk template conversion")
11186       mode = (instance.disk_template, self.op.disk_template)
11187       try:
11188         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11189       except:
11190         self.cfg.ReleaseDRBDMinors(instance.name)
11191         raise
11192       result.append(("disk_template", self.op.disk_template))
11193
11194     # NIC changes
11195     for nic_op, nic_dict in self.op.nics:
11196       if nic_op == constants.DDM_REMOVE:
11197         # remove the last nic
11198         del instance.nics[-1]
11199         result.append(("nic.%d" % len(instance.nics), "remove"))
11200       elif nic_op == constants.DDM_ADD:
11201         # mac and bridge should be set, by now
11202         mac = nic_dict[constants.INIC_MAC]
11203         ip = nic_dict.get(constants.INIC_IP, None)
11204         nicparams = self.nic_pinst[constants.DDM_ADD]
11205         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11206         instance.nics.append(new_nic)
11207         result.append(("nic.%d" % (len(instance.nics) - 1),
11208                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11209                        (new_nic.mac, new_nic.ip,
11210                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11211                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11212                        )))
11213       else:
11214         for key in (constants.INIC_MAC, constants.INIC_IP):
11215           if key in nic_dict:
11216             setattr(instance.nics[nic_op], key, nic_dict[key])
11217         if nic_op in self.nic_pinst:
11218           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11219         for key, val in nic_dict.iteritems():
11220           result.append(("nic.%s/%d" % (key, nic_op), val))
11221
11222     # hvparams changes
11223     if self.op.hvparams:
11224       instance.hvparams = self.hv_inst
11225       for key, val in self.op.hvparams.iteritems():
11226         result.append(("hv/%s" % key, val))
11227
11228     # beparams changes
11229     if self.op.beparams:
11230       instance.beparams = self.be_inst
11231       for key, val in self.op.beparams.iteritems():
11232         result.append(("be/%s" % key, val))
11233
11234     # OS change
11235     if self.op.os_name:
11236       instance.os = self.op.os_name
11237
11238     # osparams changes
11239     if self.op.osparams:
11240       instance.osparams = self.os_inst
11241       for key, val in self.op.osparams.iteritems():
11242         result.append(("os/%s" % key, val))
11243
11244     self.cfg.Update(instance, feedback_fn)
11245
11246     return result
11247
11248   _DISK_CONVERSIONS = {
11249     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11250     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11251     }
11252
11253
11254 class LUInstanceChangeGroup(LogicalUnit):
11255   HPATH = "instance-change-group"
11256   HTYPE = constants.HTYPE_INSTANCE
11257   REQ_BGL = False
11258
11259   def ExpandNames(self):
11260     self.share_locks = _ShareAll()
11261     self.needed_locks = {
11262       locking.LEVEL_NODEGROUP: [],
11263       locking.LEVEL_NODE: [],
11264       }
11265
11266     self._ExpandAndLockInstance()
11267
11268     if self.op.target_groups:
11269       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11270                                   self.op.target_groups)
11271     else:
11272       self.req_target_uuids = None
11273
11274     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11275
11276   def DeclareLocks(self, level):
11277     if level == locking.LEVEL_NODEGROUP:
11278       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11279
11280       if self.req_target_uuids:
11281         lock_groups = set(self.req_target_uuids)
11282
11283         # Lock all groups used by instance optimistically; this requires going
11284         # via the node before it's locked, requiring verification later on
11285         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11286         lock_groups.update(instance_groups)
11287       else:
11288         # No target groups, need to lock all of them
11289         lock_groups = locking.ALL_SET
11290
11291       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11292
11293     elif level == locking.LEVEL_NODE:
11294       if self.req_target_uuids:
11295         # Lock all nodes used by instances
11296         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11297         self._LockInstancesNodes()
11298
11299         # Lock all nodes in all potential target groups
11300         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11301                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11302         member_nodes = [node_name
11303                         for group in lock_groups
11304                         for node_name in self.cfg.GetNodeGroup(group).members]
11305         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11306       else:
11307         # Lock all nodes as all groups are potential targets
11308         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11309
11310   def CheckPrereq(self):
11311     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11312     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11313     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11314
11315     assert (self.req_target_uuids is None or
11316             owned_groups.issuperset(self.req_target_uuids))
11317     assert owned_instances == set([self.op.instance_name])
11318
11319     # Get instance information
11320     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11321
11322     # Check if node groups for locked instance are still correct
11323     assert owned_nodes.issuperset(self.instance.all_nodes), \
11324       ("Instance %s's nodes changed while we kept the lock" %
11325        self.op.instance_name)
11326
11327     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11328                                            owned_groups)
11329
11330     if self.req_target_uuids:
11331       # User requested specific target groups
11332       self.target_uuids = self.req_target_uuids
11333     else:
11334       # All groups except those used by the instance are potential targets
11335       self.target_uuids = owned_groups - inst_groups
11336
11337     conflicting_groups = self.target_uuids & inst_groups
11338     if conflicting_groups:
11339       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11340                                  " used by the instance '%s'" %
11341                                  (utils.CommaJoin(conflicting_groups),
11342                                   self.op.instance_name),
11343                                  errors.ECODE_INVAL)
11344
11345     if not self.target_uuids:
11346       raise errors.OpPrereqError("There are no possible target groups",
11347                                  errors.ECODE_INVAL)
11348
11349   def BuildHooksEnv(self):
11350     """Build hooks env.
11351
11352     """
11353     assert self.target_uuids
11354
11355     env = {
11356       "TARGET_GROUPS": " ".join(self.target_uuids),
11357       }
11358
11359     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11360
11361     return env
11362
11363   def BuildHooksNodes(self):
11364     """Build hooks nodes.
11365
11366     """
11367     mn = self.cfg.GetMasterNode()
11368     return ([mn], [mn])
11369
11370   def Exec(self, feedback_fn):
11371     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11372
11373     assert instances == [self.op.instance_name], "Instance not locked"
11374
11375     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11376                      instances=instances, target_groups=list(self.target_uuids))
11377
11378     ial.Run(self.op.iallocator)
11379
11380     if not ial.success:
11381       raise errors.OpPrereqError("Can't compute solution for changing group of"
11382                                  " instance '%s' using iallocator '%s': %s" %
11383                                  (self.op.instance_name, self.op.iallocator,
11384                                   ial.info),
11385                                  errors.ECODE_NORES)
11386
11387     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11388
11389     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11390                  " instance '%s'", len(jobs), self.op.instance_name)
11391
11392     return ResultWithJobs(jobs)
11393
11394
11395 class LUBackupQuery(NoHooksLU):
11396   """Query the exports list
11397
11398   """
11399   REQ_BGL = False
11400
11401   def ExpandNames(self):
11402     self.needed_locks = {}
11403     self.share_locks[locking.LEVEL_NODE] = 1
11404     if not self.op.nodes:
11405       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11406     else:
11407       self.needed_locks[locking.LEVEL_NODE] = \
11408         _GetWantedNodes(self, self.op.nodes)
11409
11410   def Exec(self, feedback_fn):
11411     """Compute the list of all the exported system images.
11412
11413     @rtype: dict
11414     @return: a dictionary with the structure node->(export-list)
11415         where export-list is a list of the instances exported on
11416         that node.
11417
11418     """
11419     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11420     rpcresult = self.rpc.call_export_list(self.nodes)
11421     result = {}
11422     for node in rpcresult:
11423       if rpcresult[node].fail_msg:
11424         result[node] = False
11425       else:
11426         result[node] = rpcresult[node].payload
11427
11428     return result
11429
11430
11431 class LUBackupPrepare(NoHooksLU):
11432   """Prepares an instance for an export and returns useful information.
11433
11434   """
11435   REQ_BGL = False
11436
11437   def ExpandNames(self):
11438     self._ExpandAndLockInstance()
11439
11440   def CheckPrereq(self):
11441     """Check prerequisites.
11442
11443     """
11444     instance_name = self.op.instance_name
11445
11446     self.instance = self.cfg.GetInstanceInfo(instance_name)
11447     assert self.instance is not None, \
11448           "Cannot retrieve locked instance %s" % self.op.instance_name
11449     _CheckNodeOnline(self, self.instance.primary_node)
11450
11451     self._cds = _GetClusterDomainSecret()
11452
11453   def Exec(self, feedback_fn):
11454     """Prepares an instance for an export.
11455
11456     """
11457     instance = self.instance
11458
11459     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11460       salt = utils.GenerateSecret(8)
11461
11462       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11463       result = self.rpc.call_x509_cert_create(instance.primary_node,
11464                                               constants.RIE_CERT_VALIDITY)
11465       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11466
11467       (name, cert_pem) = result.payload
11468
11469       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11470                                              cert_pem)
11471
11472       return {
11473         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11474         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11475                           salt),
11476         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11477         }
11478
11479     return None
11480
11481
11482 class LUBackupExport(LogicalUnit):
11483   """Export an instance to an image in the cluster.
11484
11485   """
11486   HPATH = "instance-export"
11487   HTYPE = constants.HTYPE_INSTANCE
11488   REQ_BGL = False
11489
11490   def CheckArguments(self):
11491     """Check the arguments.
11492
11493     """
11494     self.x509_key_name = self.op.x509_key_name
11495     self.dest_x509_ca_pem = self.op.destination_x509_ca
11496
11497     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11498       if not self.x509_key_name:
11499         raise errors.OpPrereqError("Missing X509 key name for encryption",
11500                                    errors.ECODE_INVAL)
11501
11502       if not self.dest_x509_ca_pem:
11503         raise errors.OpPrereqError("Missing destination X509 CA",
11504                                    errors.ECODE_INVAL)
11505
11506   def ExpandNames(self):
11507     self._ExpandAndLockInstance()
11508
11509     # Lock all nodes for local exports
11510     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11511       # FIXME: lock only instance primary and destination node
11512       #
11513       # Sad but true, for now we have do lock all nodes, as we don't know where
11514       # the previous export might be, and in this LU we search for it and
11515       # remove it from its current node. In the future we could fix this by:
11516       #  - making a tasklet to search (share-lock all), then create the
11517       #    new one, then one to remove, after
11518       #  - removing the removal operation altogether
11519       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11520
11521   def DeclareLocks(self, level):
11522     """Last minute lock declaration."""
11523     # All nodes are locked anyway, so nothing to do here.
11524
11525   def BuildHooksEnv(self):
11526     """Build hooks env.
11527
11528     This will run on the master, primary node and target node.
11529
11530     """
11531     env = {
11532       "EXPORT_MODE": self.op.mode,
11533       "EXPORT_NODE": self.op.target_node,
11534       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11535       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11536       # TODO: Generic function for boolean env variables
11537       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11538       }
11539
11540     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11541
11542     return env
11543
11544   def BuildHooksNodes(self):
11545     """Build hooks nodes.
11546
11547     """
11548     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11549
11550     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11551       nl.append(self.op.target_node)
11552
11553     return (nl, nl)
11554
11555   def CheckPrereq(self):
11556     """Check prerequisites.
11557
11558     This checks that the instance and node names are valid.
11559
11560     """
11561     instance_name = self.op.instance_name
11562
11563     self.instance = self.cfg.GetInstanceInfo(instance_name)
11564     assert self.instance is not None, \
11565           "Cannot retrieve locked instance %s" % self.op.instance_name
11566     _CheckNodeOnline(self, self.instance.primary_node)
11567
11568     if (self.op.remove_instance and self.instance.admin_up and
11569         not self.op.shutdown):
11570       raise errors.OpPrereqError("Can not remove instance without shutting it"
11571                                  " down before")
11572
11573     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11574       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11575       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11576       assert self.dst_node is not None
11577
11578       _CheckNodeOnline(self, self.dst_node.name)
11579       _CheckNodeNotDrained(self, self.dst_node.name)
11580
11581       self._cds = None
11582       self.dest_disk_info = None
11583       self.dest_x509_ca = None
11584
11585     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11586       self.dst_node = None
11587
11588       if len(self.op.target_node) != len(self.instance.disks):
11589         raise errors.OpPrereqError(("Received destination information for %s"
11590                                     " disks, but instance %s has %s disks") %
11591                                    (len(self.op.target_node), instance_name,
11592                                     len(self.instance.disks)),
11593                                    errors.ECODE_INVAL)
11594
11595       cds = _GetClusterDomainSecret()
11596
11597       # Check X509 key name
11598       try:
11599         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11600       except (TypeError, ValueError), err:
11601         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11602
11603       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11604         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11605                                    errors.ECODE_INVAL)
11606
11607       # Load and verify CA
11608       try:
11609         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11610       except OpenSSL.crypto.Error, err:
11611         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11612                                    (err, ), errors.ECODE_INVAL)
11613
11614       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11615       if errcode is not None:
11616         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11617                                    (msg, ), errors.ECODE_INVAL)
11618
11619       self.dest_x509_ca = cert
11620
11621       # Verify target information
11622       disk_info = []
11623       for idx, disk_data in enumerate(self.op.target_node):
11624         try:
11625           (host, port, magic) = \
11626             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11627         except errors.GenericError, err:
11628           raise errors.OpPrereqError("Target info for disk %s: %s" %
11629                                      (idx, err), errors.ECODE_INVAL)
11630
11631         disk_info.append((host, port, magic))
11632
11633       assert len(disk_info) == len(self.op.target_node)
11634       self.dest_disk_info = disk_info
11635
11636     else:
11637       raise errors.ProgrammerError("Unhandled export mode %r" %
11638                                    self.op.mode)
11639
11640     # instance disk type verification
11641     # TODO: Implement export support for file-based disks
11642     for disk in self.instance.disks:
11643       if disk.dev_type == constants.LD_FILE:
11644         raise errors.OpPrereqError("Export not supported for instances with"
11645                                    " file-based disks", errors.ECODE_INVAL)
11646
11647   def _CleanupExports(self, feedback_fn):
11648     """Removes exports of current instance from all other nodes.
11649
11650     If an instance in a cluster with nodes A..D was exported to node C, its
11651     exports will be removed from the nodes A, B and D.
11652
11653     """
11654     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11655
11656     nodelist = self.cfg.GetNodeList()
11657     nodelist.remove(self.dst_node.name)
11658
11659     # on one-node clusters nodelist will be empty after the removal
11660     # if we proceed the backup would be removed because OpBackupQuery
11661     # substitutes an empty list with the full cluster node list.
11662     iname = self.instance.name
11663     if nodelist:
11664       feedback_fn("Removing old exports for instance %s" % iname)
11665       exportlist = self.rpc.call_export_list(nodelist)
11666       for node in exportlist:
11667         if exportlist[node].fail_msg:
11668           continue
11669         if iname in exportlist[node].payload:
11670           msg = self.rpc.call_export_remove(node, iname).fail_msg
11671           if msg:
11672             self.LogWarning("Could not remove older export for instance %s"
11673                             " on node %s: %s", iname, node, msg)
11674
11675   def Exec(self, feedback_fn):
11676     """Export an instance to an image in the cluster.
11677
11678     """
11679     assert self.op.mode in constants.EXPORT_MODES
11680
11681     instance = self.instance
11682     src_node = instance.primary_node
11683
11684     if self.op.shutdown:
11685       # shutdown the instance, but not the disks
11686       feedback_fn("Shutting down instance %s" % instance.name)
11687       result = self.rpc.call_instance_shutdown(src_node, instance,
11688                                                self.op.shutdown_timeout)
11689       # TODO: Maybe ignore failures if ignore_remove_failures is set
11690       result.Raise("Could not shutdown instance %s on"
11691                    " node %s" % (instance.name, src_node))
11692
11693     # set the disks ID correctly since call_instance_start needs the
11694     # correct drbd minor to create the symlinks
11695     for disk in instance.disks:
11696       self.cfg.SetDiskID(disk, src_node)
11697
11698     activate_disks = (not instance.admin_up)
11699
11700     if activate_disks:
11701       # Activate the instance disks if we'exporting a stopped instance
11702       feedback_fn("Activating disks for %s" % instance.name)
11703       _StartInstanceDisks(self, instance, None)
11704
11705     try:
11706       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11707                                                      instance)
11708
11709       helper.CreateSnapshots()
11710       try:
11711         if (self.op.shutdown and instance.admin_up and
11712             not self.op.remove_instance):
11713           assert not activate_disks
11714           feedback_fn("Starting instance %s" % instance.name)
11715           result = self.rpc.call_instance_start(src_node, instance,
11716                                                 None, None, False)
11717           msg = result.fail_msg
11718           if msg:
11719             feedback_fn("Failed to start instance: %s" % msg)
11720             _ShutdownInstanceDisks(self, instance)
11721             raise errors.OpExecError("Could not start instance: %s" % msg)
11722
11723         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11724           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11725         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11726           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11727           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11728
11729           (key_name, _, _) = self.x509_key_name
11730
11731           dest_ca_pem = \
11732             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11733                                             self.dest_x509_ca)
11734
11735           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11736                                                      key_name, dest_ca_pem,
11737                                                      timeouts)
11738       finally:
11739         helper.Cleanup()
11740
11741       # Check for backwards compatibility
11742       assert len(dresults) == len(instance.disks)
11743       assert compat.all(isinstance(i, bool) for i in dresults), \
11744              "Not all results are boolean: %r" % dresults
11745
11746     finally:
11747       if activate_disks:
11748         feedback_fn("Deactivating disks for %s" % instance.name)
11749         _ShutdownInstanceDisks(self, instance)
11750
11751     if not (compat.all(dresults) and fin_resu):
11752       failures = []
11753       if not fin_resu:
11754         failures.append("export finalization")
11755       if not compat.all(dresults):
11756         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11757                                if not dsk)
11758         failures.append("disk export: disk(s) %s" % fdsk)
11759
11760       raise errors.OpExecError("Export failed, errors in %s" %
11761                                utils.CommaJoin(failures))
11762
11763     # At this point, the export was successful, we can cleanup/finish
11764
11765     # Remove instance if requested
11766     if self.op.remove_instance:
11767       feedback_fn("Removing instance %s" % instance.name)
11768       _RemoveInstance(self, feedback_fn, instance,
11769                       self.op.ignore_remove_failures)
11770
11771     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11772       self._CleanupExports(feedback_fn)
11773
11774     return fin_resu, dresults
11775
11776
11777 class LUBackupRemove(NoHooksLU):
11778   """Remove exports related to the named instance.
11779
11780   """
11781   REQ_BGL = False
11782
11783   def ExpandNames(self):
11784     self.needed_locks = {}
11785     # We need all nodes to be locked in order for RemoveExport to work, but we
11786     # don't need to lock the instance itself, as nothing will happen to it (and
11787     # we can remove exports also for a removed instance)
11788     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11789
11790   def Exec(self, feedback_fn):
11791     """Remove any export.
11792
11793     """
11794     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11795     # If the instance was not found we'll try with the name that was passed in.
11796     # This will only work if it was an FQDN, though.
11797     fqdn_warn = False
11798     if not instance_name:
11799       fqdn_warn = True
11800       instance_name = self.op.instance_name
11801
11802     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11803     exportlist = self.rpc.call_export_list(locked_nodes)
11804     found = False
11805     for node in exportlist:
11806       msg = exportlist[node].fail_msg
11807       if msg:
11808         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11809         continue
11810       if instance_name in exportlist[node].payload:
11811         found = True
11812         result = self.rpc.call_export_remove(node, instance_name)
11813         msg = result.fail_msg
11814         if msg:
11815           logging.error("Could not remove export for instance %s"
11816                         " on node %s: %s", instance_name, node, msg)
11817
11818     if fqdn_warn and not found:
11819       feedback_fn("Export not found. If trying to remove an export belonging"
11820                   " to a deleted instance please use its Fully Qualified"
11821                   " Domain Name.")
11822
11823
11824 class LUGroupAdd(LogicalUnit):
11825   """Logical unit for creating node groups.
11826
11827   """
11828   HPATH = "group-add"
11829   HTYPE = constants.HTYPE_GROUP
11830   REQ_BGL = False
11831
11832   def ExpandNames(self):
11833     # We need the new group's UUID here so that we can create and acquire the
11834     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11835     # that it should not check whether the UUID exists in the configuration.
11836     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11837     self.needed_locks = {}
11838     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11839
11840   def CheckPrereq(self):
11841     """Check prerequisites.
11842
11843     This checks that the given group name is not an existing node group
11844     already.
11845
11846     """
11847     try:
11848       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11849     except errors.OpPrereqError:
11850       pass
11851     else:
11852       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11853                                  " node group (UUID: %s)" %
11854                                  (self.op.group_name, existing_uuid),
11855                                  errors.ECODE_EXISTS)
11856
11857     if self.op.ndparams:
11858       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11859
11860   def BuildHooksEnv(self):
11861     """Build hooks env.
11862
11863     """
11864     return {
11865       "GROUP_NAME": self.op.group_name,
11866       }
11867
11868   def BuildHooksNodes(self):
11869     """Build hooks nodes.
11870
11871     """
11872     mn = self.cfg.GetMasterNode()
11873     return ([mn], [mn])
11874
11875   def Exec(self, feedback_fn):
11876     """Add the node group to the cluster.
11877
11878     """
11879     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11880                                   uuid=self.group_uuid,
11881                                   alloc_policy=self.op.alloc_policy,
11882                                   ndparams=self.op.ndparams)
11883
11884     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11885     del self.remove_locks[locking.LEVEL_NODEGROUP]
11886
11887
11888 class LUGroupAssignNodes(NoHooksLU):
11889   """Logical unit for assigning nodes to groups.
11890
11891   """
11892   REQ_BGL = False
11893
11894   def ExpandNames(self):
11895     # These raise errors.OpPrereqError on their own:
11896     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11897     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11898
11899     # We want to lock all the affected nodes and groups. We have readily
11900     # available the list of nodes, and the *destination* group. To gather the
11901     # list of "source" groups, we need to fetch node information later on.
11902     self.needed_locks = {
11903       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11904       locking.LEVEL_NODE: self.op.nodes,
11905       }
11906
11907   def DeclareLocks(self, level):
11908     if level == locking.LEVEL_NODEGROUP:
11909       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11910
11911       # Try to get all affected nodes' groups without having the group or node
11912       # lock yet. Needs verification later in the code flow.
11913       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11914
11915       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11916
11917   def CheckPrereq(self):
11918     """Check prerequisites.
11919
11920     """
11921     assert self.needed_locks[locking.LEVEL_NODEGROUP]
11922     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11923             frozenset(self.op.nodes))
11924
11925     expected_locks = (set([self.group_uuid]) |
11926                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11927     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11928     if actual_locks != expected_locks:
11929       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11930                                " current groups are '%s', used to be '%s'" %
11931                                (utils.CommaJoin(expected_locks),
11932                                 utils.CommaJoin(actual_locks)))
11933
11934     self.node_data = self.cfg.GetAllNodesInfo()
11935     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11936     instance_data = self.cfg.GetAllInstancesInfo()
11937
11938     if self.group is None:
11939       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11940                                (self.op.group_name, self.group_uuid))
11941
11942     (new_splits, previous_splits) = \
11943       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11944                                              for node in self.op.nodes],
11945                                             self.node_data, instance_data)
11946
11947     if new_splits:
11948       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11949
11950       if not self.op.force:
11951         raise errors.OpExecError("The following instances get split by this"
11952                                  " change and --force was not given: %s" %
11953                                  fmt_new_splits)
11954       else:
11955         self.LogWarning("This operation will split the following instances: %s",
11956                         fmt_new_splits)
11957
11958         if previous_splits:
11959           self.LogWarning("In addition, these already-split instances continue"
11960                           " to be split across groups: %s",
11961                           utils.CommaJoin(utils.NiceSort(previous_splits)))
11962
11963   def Exec(self, feedback_fn):
11964     """Assign nodes to a new group.
11965
11966     """
11967     for node in self.op.nodes:
11968       self.node_data[node].group = self.group_uuid
11969
11970     # FIXME: Depends on side-effects of modifying the result of
11971     # C{cfg.GetAllNodesInfo}
11972
11973     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11974
11975   @staticmethod
11976   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11977     """Check for split instances after a node assignment.
11978
11979     This method considers a series of node assignments as an atomic operation,
11980     and returns information about split instances after applying the set of
11981     changes.
11982
11983     In particular, it returns information about newly split instances, and
11984     instances that were already split, and remain so after the change.
11985
11986     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11987     considered.
11988
11989     @type changes: list of (node_name, new_group_uuid) pairs.
11990     @param changes: list of node assignments to consider.
11991     @param node_data: a dict with data for all nodes
11992     @param instance_data: a dict with all instances to consider
11993     @rtype: a two-tuple
11994     @return: a list of instances that were previously okay and result split as a
11995       consequence of this change, and a list of instances that were previously
11996       split and this change does not fix.
11997
11998     """
11999     changed_nodes = dict((node, group) for node, group in changes
12000                          if node_data[node].group != group)
12001
12002     all_split_instances = set()
12003     previously_split_instances = set()
12004
12005     def InstanceNodes(instance):
12006       return [instance.primary_node] + list(instance.secondary_nodes)
12007
12008     for inst in instance_data.values():
12009       if inst.disk_template not in constants.DTS_INT_MIRROR:
12010         continue
12011
12012       instance_nodes = InstanceNodes(inst)
12013
12014       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12015         previously_split_instances.add(inst.name)
12016
12017       if len(set(changed_nodes.get(node, node_data[node].group)
12018                  for node in instance_nodes)) > 1:
12019         all_split_instances.add(inst.name)
12020
12021     return (list(all_split_instances - previously_split_instances),
12022             list(previously_split_instances & all_split_instances))
12023
12024
12025 class _GroupQuery(_QueryBase):
12026   FIELDS = query.GROUP_FIELDS
12027
12028   def ExpandNames(self, lu):
12029     lu.needed_locks = {}
12030
12031     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12032     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12033
12034     if not self.names:
12035       self.wanted = [name_to_uuid[name]
12036                      for name in utils.NiceSort(name_to_uuid.keys())]
12037     else:
12038       # Accept names to be either names or UUIDs.
12039       missing = []
12040       self.wanted = []
12041       all_uuid = frozenset(self._all_groups.keys())
12042
12043       for name in self.names:
12044         if name in all_uuid:
12045           self.wanted.append(name)
12046         elif name in name_to_uuid:
12047           self.wanted.append(name_to_uuid[name])
12048         else:
12049           missing.append(name)
12050
12051       if missing:
12052         raise errors.OpPrereqError("Some groups do not exist: %s" %
12053                                    utils.CommaJoin(missing),
12054                                    errors.ECODE_NOENT)
12055
12056   def DeclareLocks(self, lu, level):
12057     pass
12058
12059   def _GetQueryData(self, lu):
12060     """Computes the list of node groups and their attributes.
12061
12062     """
12063     do_nodes = query.GQ_NODE in self.requested_data
12064     do_instances = query.GQ_INST in self.requested_data
12065
12066     group_to_nodes = None
12067     group_to_instances = None
12068
12069     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12070     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12071     # latter GetAllInstancesInfo() is not enough, for we have to go through
12072     # instance->node. Hence, we will need to process nodes even if we only need
12073     # instance information.
12074     if do_nodes or do_instances:
12075       all_nodes = lu.cfg.GetAllNodesInfo()
12076       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12077       node_to_group = {}
12078
12079       for node in all_nodes.values():
12080         if node.group in group_to_nodes:
12081           group_to_nodes[node.group].append(node.name)
12082           node_to_group[node.name] = node.group
12083
12084       if do_instances:
12085         all_instances = lu.cfg.GetAllInstancesInfo()
12086         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12087
12088         for instance in all_instances.values():
12089           node = instance.primary_node
12090           if node in node_to_group:
12091             group_to_instances[node_to_group[node]].append(instance.name)
12092
12093         if not do_nodes:
12094           # Do not pass on node information if it was not requested.
12095           group_to_nodes = None
12096
12097     return query.GroupQueryData([self._all_groups[uuid]
12098                                  for uuid in self.wanted],
12099                                 group_to_nodes, group_to_instances)
12100
12101
12102 class LUGroupQuery(NoHooksLU):
12103   """Logical unit for querying node groups.
12104
12105   """
12106   REQ_BGL = False
12107
12108   def CheckArguments(self):
12109     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12110                           self.op.output_fields, False)
12111
12112   def ExpandNames(self):
12113     self.gq.ExpandNames(self)
12114
12115   def DeclareLocks(self, level):
12116     self.gq.DeclareLocks(self, level)
12117
12118   def Exec(self, feedback_fn):
12119     return self.gq.OldStyleQuery(self)
12120
12121
12122 class LUGroupSetParams(LogicalUnit):
12123   """Modifies the parameters of a node group.
12124
12125   """
12126   HPATH = "group-modify"
12127   HTYPE = constants.HTYPE_GROUP
12128   REQ_BGL = False
12129
12130   def CheckArguments(self):
12131     all_changes = [
12132       self.op.ndparams,
12133       self.op.alloc_policy,
12134       ]
12135
12136     if all_changes.count(None) == len(all_changes):
12137       raise errors.OpPrereqError("Please pass at least one modification",
12138                                  errors.ECODE_INVAL)
12139
12140   def ExpandNames(self):
12141     # This raises errors.OpPrereqError on its own:
12142     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12143
12144     self.needed_locks = {
12145       locking.LEVEL_NODEGROUP: [self.group_uuid],
12146       }
12147
12148   def CheckPrereq(self):
12149     """Check prerequisites.
12150
12151     """
12152     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12153
12154     if self.group is None:
12155       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12156                                (self.op.group_name, self.group_uuid))
12157
12158     if self.op.ndparams:
12159       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12160       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12161       self.new_ndparams = new_ndparams
12162
12163   def BuildHooksEnv(self):
12164     """Build hooks env.
12165
12166     """
12167     return {
12168       "GROUP_NAME": self.op.group_name,
12169       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12170       }
12171
12172   def BuildHooksNodes(self):
12173     """Build hooks nodes.
12174
12175     """
12176     mn = self.cfg.GetMasterNode()
12177     return ([mn], [mn])
12178
12179   def Exec(self, feedback_fn):
12180     """Modifies the node group.
12181
12182     """
12183     result = []
12184
12185     if self.op.ndparams:
12186       self.group.ndparams = self.new_ndparams
12187       result.append(("ndparams", str(self.group.ndparams)))
12188
12189     if self.op.alloc_policy:
12190       self.group.alloc_policy = self.op.alloc_policy
12191
12192     self.cfg.Update(self.group, feedback_fn)
12193     return result
12194
12195
12196 class LUGroupRemove(LogicalUnit):
12197   HPATH = "group-remove"
12198   HTYPE = constants.HTYPE_GROUP
12199   REQ_BGL = False
12200
12201   def ExpandNames(self):
12202     # This will raises errors.OpPrereqError on its own:
12203     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12204     self.needed_locks = {
12205       locking.LEVEL_NODEGROUP: [self.group_uuid],
12206       }
12207
12208   def CheckPrereq(self):
12209     """Check prerequisites.
12210
12211     This checks that the given group name exists as a node group, that is
12212     empty (i.e., contains no nodes), and that is not the last group of the
12213     cluster.
12214
12215     """
12216     # Verify that the group is empty.
12217     group_nodes = [node.name
12218                    for node in self.cfg.GetAllNodesInfo().values()
12219                    if node.group == self.group_uuid]
12220
12221     if group_nodes:
12222       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12223                                  " nodes: %s" %
12224                                  (self.op.group_name,
12225                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12226                                  errors.ECODE_STATE)
12227
12228     # Verify the cluster would not be left group-less.
12229     if len(self.cfg.GetNodeGroupList()) == 1:
12230       raise errors.OpPrereqError("Group '%s' is the only group,"
12231                                  " cannot be removed" %
12232                                  self.op.group_name,
12233                                  errors.ECODE_STATE)
12234
12235   def BuildHooksEnv(self):
12236     """Build hooks env.
12237
12238     """
12239     return {
12240       "GROUP_NAME": self.op.group_name,
12241       }
12242
12243   def BuildHooksNodes(self):
12244     """Build hooks nodes.
12245
12246     """
12247     mn = self.cfg.GetMasterNode()
12248     return ([mn], [mn])
12249
12250   def Exec(self, feedback_fn):
12251     """Remove the node group.
12252
12253     """
12254     try:
12255       self.cfg.RemoveNodeGroup(self.group_uuid)
12256     except errors.ConfigurationError:
12257       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12258                                (self.op.group_name, self.group_uuid))
12259
12260     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12261
12262
12263 class LUGroupRename(LogicalUnit):
12264   HPATH = "group-rename"
12265   HTYPE = constants.HTYPE_GROUP
12266   REQ_BGL = False
12267
12268   def ExpandNames(self):
12269     # This raises errors.OpPrereqError on its own:
12270     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12271
12272     self.needed_locks = {
12273       locking.LEVEL_NODEGROUP: [self.group_uuid],
12274       }
12275
12276   def CheckPrereq(self):
12277     """Check prerequisites.
12278
12279     Ensures requested new name is not yet used.
12280
12281     """
12282     try:
12283       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12284     except errors.OpPrereqError:
12285       pass
12286     else:
12287       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12288                                  " node group (UUID: %s)" %
12289                                  (self.op.new_name, new_name_uuid),
12290                                  errors.ECODE_EXISTS)
12291
12292   def BuildHooksEnv(self):
12293     """Build hooks env.
12294
12295     """
12296     return {
12297       "OLD_NAME": self.op.group_name,
12298       "NEW_NAME": self.op.new_name,
12299       }
12300
12301   def BuildHooksNodes(self):
12302     """Build hooks nodes.
12303
12304     """
12305     mn = self.cfg.GetMasterNode()
12306
12307     all_nodes = self.cfg.GetAllNodesInfo()
12308     all_nodes.pop(mn, None)
12309
12310     run_nodes = [mn]
12311     run_nodes.extend(node.name for node in all_nodes.values()
12312                      if node.group == self.group_uuid)
12313
12314     return (run_nodes, run_nodes)
12315
12316   def Exec(self, feedback_fn):
12317     """Rename the node group.
12318
12319     """
12320     group = self.cfg.GetNodeGroup(self.group_uuid)
12321
12322     if group is None:
12323       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12324                                (self.op.group_name, self.group_uuid))
12325
12326     group.name = self.op.new_name
12327     self.cfg.Update(group, feedback_fn)
12328
12329     return self.op.new_name
12330
12331
12332 class LUGroupEvacuate(LogicalUnit):
12333   HPATH = "group-evacuate"
12334   HTYPE = constants.HTYPE_GROUP
12335   REQ_BGL = False
12336
12337   def ExpandNames(self):
12338     # This raises errors.OpPrereqError on its own:
12339     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12340
12341     if self.op.target_groups:
12342       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12343                                   self.op.target_groups)
12344     else:
12345       self.req_target_uuids = []
12346
12347     if self.group_uuid in self.req_target_uuids:
12348       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12349                                  " as a target group (targets are %s)" %
12350                                  (self.group_uuid,
12351                                   utils.CommaJoin(self.req_target_uuids)),
12352                                  errors.ECODE_INVAL)
12353
12354     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12355
12356     self.share_locks = _ShareAll()
12357     self.needed_locks = {
12358       locking.LEVEL_INSTANCE: [],
12359       locking.LEVEL_NODEGROUP: [],
12360       locking.LEVEL_NODE: [],
12361       }
12362
12363   def DeclareLocks(self, level):
12364     if level == locking.LEVEL_INSTANCE:
12365       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12366
12367       # Lock instances optimistically, needs verification once node and group
12368       # locks have been acquired
12369       self.needed_locks[locking.LEVEL_INSTANCE] = \
12370         self.cfg.GetNodeGroupInstances(self.group_uuid)
12371
12372     elif level == locking.LEVEL_NODEGROUP:
12373       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12374
12375       if self.req_target_uuids:
12376         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12377
12378         # Lock all groups used by instances optimistically; this requires going
12379         # via the node before it's locked, requiring verification later on
12380         lock_groups.update(group_uuid
12381                            for instance_name in
12382                              self.owned_locks(locking.LEVEL_INSTANCE)
12383                            for group_uuid in
12384                              self.cfg.GetInstanceNodeGroups(instance_name))
12385       else:
12386         # No target groups, need to lock all of them
12387         lock_groups = locking.ALL_SET
12388
12389       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12390
12391     elif level == locking.LEVEL_NODE:
12392       # This will only lock the nodes in the group to be evacuated which
12393       # contain actual instances
12394       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12395       self._LockInstancesNodes()
12396
12397       # Lock all nodes in group to be evacuated and target groups
12398       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12399       assert self.group_uuid in owned_groups
12400       member_nodes = [node_name
12401                       for group in owned_groups
12402                       for node_name in self.cfg.GetNodeGroup(group).members]
12403       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12404
12405   def CheckPrereq(self):
12406     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12407     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12408     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12409
12410     assert owned_groups.issuperset(self.req_target_uuids)
12411     assert self.group_uuid in owned_groups
12412
12413     # Check if locked instances are still correct
12414     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12415
12416     # Get instance information
12417     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12418
12419     # Check if node groups for locked instances are still correct
12420     for instance_name in owned_instances:
12421       inst = self.instances[instance_name]
12422       assert owned_nodes.issuperset(inst.all_nodes), \
12423         "Instance %s's nodes changed while we kept the lock" % instance_name
12424
12425       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12426                                              owned_groups)
12427
12428       assert self.group_uuid in inst_groups, \
12429         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12430
12431     if self.req_target_uuids:
12432       # User requested specific target groups
12433       self.target_uuids = self.req_target_uuids
12434     else:
12435       # All groups except the one to be evacuated are potential targets
12436       self.target_uuids = [group_uuid for group_uuid in owned_groups
12437                            if group_uuid != self.group_uuid]
12438
12439       if not self.target_uuids:
12440         raise errors.OpPrereqError("There are no possible target groups",
12441                                    errors.ECODE_INVAL)
12442
12443   def BuildHooksEnv(self):
12444     """Build hooks env.
12445
12446     """
12447     return {
12448       "GROUP_NAME": self.op.group_name,
12449       "TARGET_GROUPS": " ".join(self.target_uuids),
12450       }
12451
12452   def BuildHooksNodes(self):
12453     """Build hooks nodes.
12454
12455     """
12456     mn = self.cfg.GetMasterNode()
12457
12458     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12459
12460     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12461
12462     return (run_nodes, run_nodes)
12463
12464   def Exec(self, feedback_fn):
12465     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12466
12467     assert self.group_uuid not in self.target_uuids
12468
12469     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12470                      instances=instances, target_groups=self.target_uuids)
12471
12472     ial.Run(self.op.iallocator)
12473
12474     if not ial.success:
12475       raise errors.OpPrereqError("Can't compute group evacuation using"
12476                                  " iallocator '%s': %s" %
12477                                  (self.op.iallocator, ial.info),
12478                                  errors.ECODE_NORES)
12479
12480     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12481
12482     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12483                  len(jobs), self.op.group_name)
12484
12485     return ResultWithJobs(jobs)
12486
12487
12488 class TagsLU(NoHooksLU): # pylint: disable=W0223
12489   """Generic tags LU.
12490
12491   This is an abstract class which is the parent of all the other tags LUs.
12492
12493   """
12494   def ExpandNames(self):
12495     self.group_uuid = None
12496     self.needed_locks = {}
12497     if self.op.kind == constants.TAG_NODE:
12498       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12499       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12500     elif self.op.kind == constants.TAG_INSTANCE:
12501       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12502       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12503     elif self.op.kind == constants.TAG_NODEGROUP:
12504       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12505
12506     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12507     # not possible to acquire the BGL based on opcode parameters)
12508
12509   def CheckPrereq(self):
12510     """Check prerequisites.
12511
12512     """
12513     if self.op.kind == constants.TAG_CLUSTER:
12514       self.target = self.cfg.GetClusterInfo()
12515     elif self.op.kind == constants.TAG_NODE:
12516       self.target = self.cfg.GetNodeInfo(self.op.name)
12517     elif self.op.kind == constants.TAG_INSTANCE:
12518       self.target = self.cfg.GetInstanceInfo(self.op.name)
12519     elif self.op.kind == constants.TAG_NODEGROUP:
12520       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12521     else:
12522       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12523                                  str(self.op.kind), errors.ECODE_INVAL)
12524
12525
12526 class LUTagsGet(TagsLU):
12527   """Returns the tags of a given object.
12528
12529   """
12530   REQ_BGL = False
12531
12532   def ExpandNames(self):
12533     TagsLU.ExpandNames(self)
12534
12535     # Share locks as this is only a read operation
12536     self.share_locks = _ShareAll()
12537
12538   def Exec(self, feedback_fn):
12539     """Returns the tag list.
12540
12541     """
12542     return list(self.target.GetTags())
12543
12544
12545 class LUTagsSearch(NoHooksLU):
12546   """Searches the tags for a given pattern.
12547
12548   """
12549   REQ_BGL = False
12550
12551   def ExpandNames(self):
12552     self.needed_locks = {}
12553
12554   def CheckPrereq(self):
12555     """Check prerequisites.
12556
12557     This checks the pattern passed for validity by compiling it.
12558
12559     """
12560     try:
12561       self.re = re.compile(self.op.pattern)
12562     except re.error, err:
12563       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12564                                  (self.op.pattern, err), errors.ECODE_INVAL)
12565
12566   def Exec(self, feedback_fn):
12567     """Returns the tag list.
12568
12569     """
12570     cfg = self.cfg
12571     tgts = [("/cluster", cfg.GetClusterInfo())]
12572     ilist = cfg.GetAllInstancesInfo().values()
12573     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12574     nlist = cfg.GetAllNodesInfo().values()
12575     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12576     tgts.extend(("/nodegroup/%s" % n.name, n)
12577                 for n in cfg.GetAllNodeGroupsInfo().values())
12578     results = []
12579     for path, target in tgts:
12580       for tag in target.GetTags():
12581         if self.re.search(tag):
12582           results.append((path, tag))
12583     return results
12584
12585
12586 class LUTagsSet(TagsLU):
12587   """Sets a tag on a given object.
12588
12589   """
12590   REQ_BGL = False
12591
12592   def CheckPrereq(self):
12593     """Check prerequisites.
12594
12595     This checks the type and length of the tag name and value.
12596
12597     """
12598     TagsLU.CheckPrereq(self)
12599     for tag in self.op.tags:
12600       objects.TaggableObject.ValidateTag(tag)
12601
12602   def Exec(self, feedback_fn):
12603     """Sets the tag.
12604
12605     """
12606     try:
12607       for tag in self.op.tags:
12608         self.target.AddTag(tag)
12609     except errors.TagError, err:
12610       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12611     self.cfg.Update(self.target, feedback_fn)
12612
12613
12614 class LUTagsDel(TagsLU):
12615   """Delete a list of tags from a given object.
12616
12617   """
12618   REQ_BGL = False
12619
12620   def CheckPrereq(self):
12621     """Check prerequisites.
12622
12623     This checks that we have the given tag.
12624
12625     """
12626     TagsLU.CheckPrereq(self)
12627     for tag in self.op.tags:
12628       objects.TaggableObject.ValidateTag(tag)
12629     del_tags = frozenset(self.op.tags)
12630     cur_tags = self.target.GetTags()
12631
12632     diff_tags = del_tags - cur_tags
12633     if diff_tags:
12634       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12635       raise errors.OpPrereqError("Tag(s) %s not found" %
12636                                  (utils.CommaJoin(diff_names), ),
12637                                  errors.ECODE_NOENT)
12638
12639   def Exec(self, feedback_fn):
12640     """Remove the tag from the object.
12641
12642     """
12643     for tag in self.op.tags:
12644       self.target.RemoveTag(tag)
12645     self.cfg.Update(self.target, feedback_fn)
12646
12647
12648 class LUTestDelay(NoHooksLU):
12649   """Sleep for a specified amount of time.
12650
12651   This LU sleeps on the master and/or nodes for a specified amount of
12652   time.
12653
12654   """
12655   REQ_BGL = False
12656
12657   def ExpandNames(self):
12658     """Expand names and set required locks.
12659
12660     This expands the node list, if any.
12661
12662     """
12663     self.needed_locks = {}
12664     if self.op.on_nodes:
12665       # _GetWantedNodes can be used here, but is not always appropriate to use
12666       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12667       # more information.
12668       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12669       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12670
12671   def _TestDelay(self):
12672     """Do the actual sleep.
12673
12674     """
12675     if self.op.on_master:
12676       if not utils.TestDelay(self.op.duration):
12677         raise errors.OpExecError("Error during master delay test")
12678     if self.op.on_nodes:
12679       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12680       for node, node_result in result.items():
12681         node_result.Raise("Failure during rpc call to node %s" % node)
12682
12683   def Exec(self, feedback_fn):
12684     """Execute the test delay opcode, with the wanted repetitions.
12685
12686     """
12687     if self.op.repeat == 0:
12688       self._TestDelay()
12689     else:
12690       top_value = self.op.repeat - 1
12691       for i in range(self.op.repeat):
12692         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12693         self._TestDelay()
12694
12695
12696 class LUTestJqueue(NoHooksLU):
12697   """Utility LU to test some aspects of the job queue.
12698
12699   """
12700   REQ_BGL = False
12701
12702   # Must be lower than default timeout for WaitForJobChange to see whether it
12703   # notices changed jobs
12704   _CLIENT_CONNECT_TIMEOUT = 20.0
12705   _CLIENT_CONFIRM_TIMEOUT = 60.0
12706
12707   @classmethod
12708   def _NotifyUsingSocket(cls, cb, errcls):
12709     """Opens a Unix socket and waits for another program to connect.
12710
12711     @type cb: callable
12712     @param cb: Callback to send socket name to client
12713     @type errcls: class
12714     @param errcls: Exception class to use for errors
12715
12716     """
12717     # Using a temporary directory as there's no easy way to create temporary
12718     # sockets without writing a custom loop around tempfile.mktemp and
12719     # socket.bind
12720     tmpdir = tempfile.mkdtemp()
12721     try:
12722       tmpsock = utils.PathJoin(tmpdir, "sock")
12723
12724       logging.debug("Creating temporary socket at %s", tmpsock)
12725       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12726       try:
12727         sock.bind(tmpsock)
12728         sock.listen(1)
12729
12730         # Send details to client
12731         cb(tmpsock)
12732
12733         # Wait for client to connect before continuing
12734         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12735         try:
12736           (conn, _) = sock.accept()
12737         except socket.error, err:
12738           raise errcls("Client didn't connect in time (%s)" % err)
12739       finally:
12740         sock.close()
12741     finally:
12742       # Remove as soon as client is connected
12743       shutil.rmtree(tmpdir)
12744
12745     # Wait for client to close
12746     try:
12747       try:
12748         # pylint: disable=E1101
12749         # Instance of '_socketobject' has no ... member
12750         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12751         conn.recv(1)
12752       except socket.error, err:
12753         raise errcls("Client failed to confirm notification (%s)" % err)
12754     finally:
12755       conn.close()
12756
12757   def _SendNotification(self, test, arg, sockname):
12758     """Sends a notification to the client.
12759
12760     @type test: string
12761     @param test: Test name
12762     @param arg: Test argument (depends on test)
12763     @type sockname: string
12764     @param sockname: Socket path
12765
12766     """
12767     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12768
12769   def _Notify(self, prereq, test, arg):
12770     """Notifies the client of a test.
12771
12772     @type prereq: bool
12773     @param prereq: Whether this is a prereq-phase test
12774     @type test: string
12775     @param test: Test name
12776     @param arg: Test argument (depends on test)
12777
12778     """
12779     if prereq:
12780       errcls = errors.OpPrereqError
12781     else:
12782       errcls = errors.OpExecError
12783
12784     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12785                                                   test, arg),
12786                                    errcls)
12787
12788   def CheckArguments(self):
12789     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12790     self.expandnames_calls = 0
12791
12792   def ExpandNames(self):
12793     checkargs_calls = getattr(self, "checkargs_calls", 0)
12794     if checkargs_calls < 1:
12795       raise errors.ProgrammerError("CheckArguments was not called")
12796
12797     self.expandnames_calls += 1
12798
12799     if self.op.notify_waitlock:
12800       self._Notify(True, constants.JQT_EXPANDNAMES, None)
12801
12802     self.LogInfo("Expanding names")
12803
12804     # Get lock on master node (just to get a lock, not for a particular reason)
12805     self.needed_locks = {
12806       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12807       }
12808
12809   def Exec(self, feedback_fn):
12810     if self.expandnames_calls < 1:
12811       raise errors.ProgrammerError("ExpandNames was not called")
12812
12813     if self.op.notify_exec:
12814       self._Notify(False, constants.JQT_EXEC, None)
12815
12816     self.LogInfo("Executing")
12817
12818     if self.op.log_messages:
12819       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12820       for idx, msg in enumerate(self.op.log_messages):
12821         self.LogInfo("Sending log message %s", idx + 1)
12822         feedback_fn(constants.JQT_MSGPREFIX + msg)
12823         # Report how many test messages have been sent
12824         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12825
12826     if self.op.fail:
12827       raise errors.OpExecError("Opcode failure was requested")
12828
12829     return True
12830
12831
12832 class IAllocator(object):
12833   """IAllocator framework.
12834
12835   An IAllocator instance has three sets of attributes:
12836     - cfg that is needed to query the cluster
12837     - input data (all members of the _KEYS class attribute are required)
12838     - four buffer attributes (in|out_data|text), that represent the
12839       input (to the external script) in text and data structure format,
12840       and the output from it, again in two formats
12841     - the result variables from the script (success, info, nodes) for
12842       easy usage
12843
12844   """
12845   # pylint: disable=R0902
12846   # lots of instance attributes
12847
12848   def __init__(self, cfg, rpc, mode, **kwargs):
12849     self.cfg = cfg
12850     self.rpc = rpc
12851     # init buffer variables
12852     self.in_text = self.out_text = self.in_data = self.out_data = None
12853     # init all input fields so that pylint is happy
12854     self.mode = mode
12855     self.memory = self.disks = self.disk_template = None
12856     self.os = self.tags = self.nics = self.vcpus = None
12857     self.hypervisor = None
12858     self.relocate_from = None
12859     self.name = None
12860     self.instances = None
12861     self.evac_mode = None
12862     self.target_groups = []
12863     # computed fields
12864     self.required_nodes = None
12865     # init result fields
12866     self.success = self.info = self.result = None
12867
12868     try:
12869       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12870     except KeyError:
12871       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12872                                    " IAllocator" % self.mode)
12873
12874     keyset = [n for (n, _) in keydata]
12875
12876     for key in kwargs:
12877       if key not in keyset:
12878         raise errors.ProgrammerError("Invalid input parameter '%s' to"
12879                                      " IAllocator" % key)
12880       setattr(self, key, kwargs[key])
12881
12882     for key in keyset:
12883       if key not in kwargs:
12884         raise errors.ProgrammerError("Missing input parameter '%s' to"
12885                                      " IAllocator" % key)
12886     self._BuildInputData(compat.partial(fn, self), keydata)
12887
12888   def _ComputeClusterData(self):
12889     """Compute the generic allocator input data.
12890
12891     This is the data that is independent of the actual operation.
12892
12893     """
12894     cfg = self.cfg
12895     cluster_info = cfg.GetClusterInfo()
12896     # cluster data
12897     data = {
12898       "version": constants.IALLOCATOR_VERSION,
12899       "cluster_name": cfg.GetClusterName(),
12900       "cluster_tags": list(cluster_info.GetTags()),
12901       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12902       # we don't have job IDs
12903       }
12904     ninfo = cfg.GetAllNodesInfo()
12905     iinfo = cfg.GetAllInstancesInfo().values()
12906     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12907
12908     # node data
12909     node_list = [n.name for n in ninfo.values() if n.vm_capable]
12910
12911     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12912       hypervisor_name = self.hypervisor
12913     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12914       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12915     else:
12916       hypervisor_name = cluster_info.enabled_hypervisors[0]
12917
12918     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12919                                         hypervisor_name)
12920     node_iinfo = \
12921       self.rpc.call_all_instances_info(node_list,
12922                                        cluster_info.enabled_hypervisors)
12923
12924     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12925
12926     config_ndata = self._ComputeBasicNodeData(ninfo)
12927     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12928                                                  i_list, config_ndata)
12929     assert len(data["nodes"]) == len(ninfo), \
12930         "Incomplete node data computed"
12931
12932     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12933
12934     self.in_data = data
12935
12936   @staticmethod
12937   def _ComputeNodeGroupData(cfg):
12938     """Compute node groups data.
12939
12940     """
12941     ng = dict((guuid, {
12942       "name": gdata.name,
12943       "alloc_policy": gdata.alloc_policy,
12944       })
12945       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12946
12947     return ng
12948
12949   @staticmethod
12950   def _ComputeBasicNodeData(node_cfg):
12951     """Compute global node data.
12952
12953     @rtype: dict
12954     @returns: a dict of name: (node dict, node config)
12955
12956     """
12957     # fill in static (config-based) values
12958     node_results = dict((ninfo.name, {
12959       "tags": list(ninfo.GetTags()),
12960       "primary_ip": ninfo.primary_ip,
12961       "secondary_ip": ninfo.secondary_ip,
12962       "offline": ninfo.offline,
12963       "drained": ninfo.drained,
12964       "master_candidate": ninfo.master_candidate,
12965       "group": ninfo.group,
12966       "master_capable": ninfo.master_capable,
12967       "vm_capable": ninfo.vm_capable,
12968       })
12969       for ninfo in node_cfg.values())
12970
12971     return node_results
12972
12973   @staticmethod
12974   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12975                               node_results):
12976     """Compute global node data.
12977
12978     @param node_results: the basic node structures as filled from the config
12979
12980     """
12981     # make a copy of the current dict
12982     node_results = dict(node_results)
12983     for nname, nresult in node_data.items():
12984       assert nname in node_results, "Missing basic data for node %s" % nname
12985       ninfo = node_cfg[nname]
12986
12987       if not (ninfo.offline or ninfo.drained):
12988         nresult.Raise("Can't get data for node %s" % nname)
12989         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12990                                 nname)
12991         remote_info = nresult.payload
12992
12993         for attr in ["memory_total", "memory_free", "memory_dom0",
12994                      "vg_size", "vg_free", "cpu_total"]:
12995           if attr not in remote_info:
12996             raise errors.OpExecError("Node '%s' didn't return attribute"
12997                                      " '%s'" % (nname, attr))
12998           if not isinstance(remote_info[attr], int):
12999             raise errors.OpExecError("Node '%s' returned invalid value"
13000                                      " for '%s': %s" %
13001                                      (nname, attr, remote_info[attr]))
13002         # compute memory used by primary instances
13003         i_p_mem = i_p_up_mem = 0
13004         for iinfo, beinfo in i_list:
13005           if iinfo.primary_node == nname:
13006             i_p_mem += beinfo[constants.BE_MEMORY]
13007             if iinfo.name not in node_iinfo[nname].payload:
13008               i_used_mem = 0
13009             else:
13010               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13011             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13012             remote_info["memory_free"] -= max(0, i_mem_diff)
13013
13014             if iinfo.admin_up:
13015               i_p_up_mem += beinfo[constants.BE_MEMORY]
13016
13017         # compute memory used by instances
13018         pnr_dyn = {
13019           "total_memory": remote_info["memory_total"],
13020           "reserved_memory": remote_info["memory_dom0"],
13021           "free_memory": remote_info["memory_free"],
13022           "total_disk": remote_info["vg_size"],
13023           "free_disk": remote_info["vg_free"],
13024           "total_cpus": remote_info["cpu_total"],
13025           "i_pri_memory": i_p_mem,
13026           "i_pri_up_memory": i_p_up_mem,
13027           }
13028         pnr_dyn.update(node_results[nname])
13029         node_results[nname] = pnr_dyn
13030
13031     return node_results
13032
13033   @staticmethod
13034   def _ComputeInstanceData(cluster_info, i_list):
13035     """Compute global instance data.
13036
13037     """
13038     instance_data = {}
13039     for iinfo, beinfo in i_list:
13040       nic_data = []
13041       for nic in iinfo.nics:
13042         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13043         nic_dict = {
13044           "mac": nic.mac,
13045           "ip": nic.ip,
13046           "mode": filled_params[constants.NIC_MODE],
13047           "link": filled_params[constants.NIC_LINK],
13048           }
13049         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13050           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13051         nic_data.append(nic_dict)
13052       pir = {
13053         "tags": list(iinfo.GetTags()),
13054         "admin_up": iinfo.admin_up,
13055         "vcpus": beinfo[constants.BE_VCPUS],
13056         "memory": beinfo[constants.BE_MEMORY],
13057         "os": iinfo.os,
13058         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13059         "nics": nic_data,
13060         "disks": [{constants.IDISK_SIZE: dsk.size,
13061                    constants.IDISK_MODE: dsk.mode}
13062                   for dsk in iinfo.disks],
13063         "disk_template": iinfo.disk_template,
13064         "hypervisor": iinfo.hypervisor,
13065         }
13066       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13067                                                  pir["disks"])
13068       instance_data[iinfo.name] = pir
13069
13070     return instance_data
13071
13072   def _AddNewInstance(self):
13073     """Add new instance data to allocator structure.
13074
13075     This in combination with _AllocatorGetClusterData will create the
13076     correct structure needed as input for the allocator.
13077
13078     The checks for the completeness of the opcode must have already been
13079     done.
13080
13081     """
13082     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13083
13084     if self.disk_template in constants.DTS_INT_MIRROR:
13085       self.required_nodes = 2
13086     else:
13087       self.required_nodes = 1
13088
13089     request = {
13090       "name": self.name,
13091       "disk_template": self.disk_template,
13092       "tags": self.tags,
13093       "os": self.os,
13094       "vcpus": self.vcpus,
13095       "memory": self.memory,
13096       "disks": self.disks,
13097       "disk_space_total": disk_space,
13098       "nics": self.nics,
13099       "required_nodes": self.required_nodes,
13100       "hypervisor": self.hypervisor,
13101       }
13102
13103     return request
13104
13105   def _AddRelocateInstance(self):
13106     """Add relocate instance data to allocator structure.
13107
13108     This in combination with _IAllocatorGetClusterData will create the
13109     correct structure needed as input for the allocator.
13110
13111     The checks for the completeness of the opcode must have already been
13112     done.
13113
13114     """
13115     instance = self.cfg.GetInstanceInfo(self.name)
13116     if instance is None:
13117       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13118                                    " IAllocator" % self.name)
13119
13120     if instance.disk_template not in constants.DTS_MIRRORED:
13121       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13122                                  errors.ECODE_INVAL)
13123
13124     if instance.disk_template in constants.DTS_INT_MIRROR and \
13125         len(instance.secondary_nodes) != 1:
13126       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13127                                  errors.ECODE_STATE)
13128
13129     self.required_nodes = 1
13130     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13131     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13132
13133     request = {
13134       "name": self.name,
13135       "disk_space_total": disk_space,
13136       "required_nodes": self.required_nodes,
13137       "relocate_from": self.relocate_from,
13138       }
13139     return request
13140
13141   def _AddNodeEvacuate(self):
13142     """Get data for node-evacuate requests.
13143
13144     """
13145     return {
13146       "instances": self.instances,
13147       "evac_mode": self.evac_mode,
13148       }
13149
13150   def _AddChangeGroup(self):
13151     """Get data for node-evacuate requests.
13152
13153     """
13154     return {
13155       "instances": self.instances,
13156       "target_groups": self.target_groups,
13157       }
13158
13159   def _BuildInputData(self, fn, keydata):
13160     """Build input data structures.
13161
13162     """
13163     self._ComputeClusterData()
13164
13165     request = fn()
13166     request["type"] = self.mode
13167     for keyname, keytype in keydata:
13168       if keyname not in request:
13169         raise errors.ProgrammerError("Request parameter %s is missing" %
13170                                      keyname)
13171       val = request[keyname]
13172       if not keytype(val):
13173         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13174                                      " validation, value %s, expected"
13175                                      " type %s" % (keyname, val, keytype))
13176     self.in_data["request"] = request
13177
13178     self.in_text = serializer.Dump(self.in_data)
13179
13180   _STRING_LIST = ht.TListOf(ht.TString)
13181   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13182      # pylint: disable=E1101
13183      # Class '...' has no 'OP_ID' member
13184      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13185                           opcodes.OpInstanceMigrate.OP_ID,
13186                           opcodes.OpInstanceReplaceDisks.OP_ID])
13187      })))
13188
13189   _NEVAC_MOVED = \
13190     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13191                        ht.TItems([ht.TNonEmptyString,
13192                                   ht.TNonEmptyString,
13193                                   ht.TListOf(ht.TNonEmptyString),
13194                                  ])))
13195   _NEVAC_FAILED = \
13196     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13197                        ht.TItems([ht.TNonEmptyString,
13198                                   ht.TMaybeString,
13199                                  ])))
13200   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13201                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13202
13203   _MODE_DATA = {
13204     constants.IALLOCATOR_MODE_ALLOC:
13205       (_AddNewInstance,
13206        [
13207         ("name", ht.TString),
13208         ("memory", ht.TInt),
13209         ("disks", ht.TListOf(ht.TDict)),
13210         ("disk_template", ht.TString),
13211         ("os", ht.TString),
13212         ("tags", _STRING_LIST),
13213         ("nics", ht.TListOf(ht.TDict)),
13214         ("vcpus", ht.TInt),
13215         ("hypervisor", ht.TString),
13216         ], ht.TList),
13217     constants.IALLOCATOR_MODE_RELOC:
13218       (_AddRelocateInstance,
13219        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13220        ht.TList),
13221      constants.IALLOCATOR_MODE_NODE_EVAC:
13222       (_AddNodeEvacuate, [
13223         ("instances", _STRING_LIST),
13224         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13225         ], _NEVAC_RESULT),
13226      constants.IALLOCATOR_MODE_CHG_GROUP:
13227       (_AddChangeGroup, [
13228         ("instances", _STRING_LIST),
13229         ("target_groups", _STRING_LIST),
13230         ], _NEVAC_RESULT),
13231     }
13232
13233   def Run(self, name, validate=True, call_fn=None):
13234     """Run an instance allocator and return the results.
13235
13236     """
13237     if call_fn is None:
13238       call_fn = self.rpc.call_iallocator_runner
13239
13240     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13241     result.Raise("Failure while running the iallocator script")
13242
13243     self.out_text = result.payload
13244     if validate:
13245       self._ValidateResult()
13246
13247   def _ValidateResult(self):
13248     """Process the allocator results.
13249
13250     This will process and if successful save the result in
13251     self.out_data and the other parameters.
13252
13253     """
13254     try:
13255       rdict = serializer.Load(self.out_text)
13256     except Exception, err:
13257       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13258
13259     if not isinstance(rdict, dict):
13260       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13261
13262     # TODO: remove backwards compatiblity in later versions
13263     if "nodes" in rdict and "result" not in rdict:
13264       rdict["result"] = rdict["nodes"]
13265       del rdict["nodes"]
13266
13267     for key in "success", "info", "result":
13268       if key not in rdict:
13269         raise errors.OpExecError("Can't parse iallocator results:"
13270                                  " missing key '%s'" % key)
13271       setattr(self, key, rdict[key])
13272
13273     if not self._result_check(self.result):
13274       raise errors.OpExecError("Iallocator returned invalid result,"
13275                                " expected %s, got %s" %
13276                                (self._result_check, self.result),
13277                                errors.ECODE_INVAL)
13278
13279     if self.mode == constants.IALLOCATOR_MODE_RELOC:
13280       assert self.relocate_from is not None
13281       assert self.required_nodes == 1
13282
13283       node2group = dict((name, ndata["group"])
13284                         for (name, ndata) in self.in_data["nodes"].items())
13285
13286       fn = compat.partial(self._NodesToGroups, node2group,
13287                           self.in_data["nodegroups"])
13288
13289       instance = self.cfg.GetInstanceInfo(self.name)
13290       request_groups = fn(self.relocate_from + [instance.primary_node])
13291       result_groups = fn(rdict["result"] + [instance.primary_node])
13292
13293       if self.success and not set(result_groups).issubset(request_groups):
13294         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13295                                  " differ from original groups (%s)" %
13296                                  (utils.CommaJoin(result_groups),
13297                                   utils.CommaJoin(request_groups)))
13298
13299     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13300       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13301
13302     self.out_data = rdict
13303
13304   @staticmethod
13305   def _NodesToGroups(node2group, groups, nodes):
13306     """Returns a list of unique group names for a list of nodes.
13307
13308     @type node2group: dict
13309     @param node2group: Map from node name to group UUID
13310     @type groups: dict
13311     @param groups: Group information
13312     @type nodes: list
13313     @param nodes: Node names
13314
13315     """
13316     result = set()
13317
13318     for node in nodes:
13319       try:
13320         group_uuid = node2group[node]
13321       except KeyError:
13322         # Ignore unknown node
13323         pass
13324       else:
13325         try:
13326           group = groups[group_uuid]
13327         except KeyError:
13328           # Can't find group, let's use UUID
13329           group_name = group_uuid
13330         else:
13331           group_name = group["name"]
13332
13333         result.add(group_name)
13334
13335     return sorted(result)
13336
13337
13338 class LUTestAllocator(NoHooksLU):
13339   """Run allocator tests.
13340
13341   This LU runs the allocator tests
13342
13343   """
13344   def CheckPrereq(self):
13345     """Check prerequisites.
13346
13347     This checks the opcode parameters depending on the director and mode test.
13348
13349     """
13350     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13351       for attr in ["memory", "disks", "disk_template",
13352                    "os", "tags", "nics", "vcpus"]:
13353         if not hasattr(self.op, attr):
13354           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13355                                      attr, errors.ECODE_INVAL)
13356       iname = self.cfg.ExpandInstanceName(self.op.name)
13357       if iname is not None:
13358         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13359                                    iname, errors.ECODE_EXISTS)
13360       if not isinstance(self.op.nics, list):
13361         raise errors.OpPrereqError("Invalid parameter 'nics'",
13362                                    errors.ECODE_INVAL)
13363       if not isinstance(self.op.disks, list):
13364         raise errors.OpPrereqError("Invalid parameter 'disks'",
13365                                    errors.ECODE_INVAL)
13366       for row in self.op.disks:
13367         if (not isinstance(row, dict) or
13368             constants.IDISK_SIZE not in row or
13369             not isinstance(row[constants.IDISK_SIZE], int) or
13370             constants.IDISK_MODE not in row or
13371             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13372           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13373                                      " parameter", errors.ECODE_INVAL)
13374       if self.op.hypervisor is None:
13375         self.op.hypervisor = self.cfg.GetHypervisorType()
13376     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13377       fname = _ExpandInstanceName(self.cfg, self.op.name)
13378       self.op.name = fname
13379       self.relocate_from = \
13380           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13381     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13382                           constants.IALLOCATOR_MODE_NODE_EVAC):
13383       if not self.op.instances:
13384         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13385       self.op.instances = _GetWantedInstances(self, self.op.instances)
13386     else:
13387       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13388                                  self.op.mode, errors.ECODE_INVAL)
13389
13390     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13391       if self.op.allocator is None:
13392         raise errors.OpPrereqError("Missing allocator name",
13393                                    errors.ECODE_INVAL)
13394     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13395       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13396                                  self.op.direction, errors.ECODE_INVAL)
13397
13398   def Exec(self, feedback_fn):
13399     """Run the allocator test.
13400
13401     """
13402     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13403       ial = IAllocator(self.cfg, self.rpc,
13404                        mode=self.op.mode,
13405                        name=self.op.name,
13406                        memory=self.op.memory,
13407                        disks=self.op.disks,
13408                        disk_template=self.op.disk_template,
13409                        os=self.op.os,
13410                        tags=self.op.tags,
13411                        nics=self.op.nics,
13412                        vcpus=self.op.vcpus,
13413                        hypervisor=self.op.hypervisor,
13414                        )
13415     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13416       ial = IAllocator(self.cfg, self.rpc,
13417                        mode=self.op.mode,
13418                        name=self.op.name,
13419                        relocate_from=list(self.relocate_from),
13420                        )
13421     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13422       ial = IAllocator(self.cfg, self.rpc,
13423                        mode=self.op.mode,
13424                        instances=self.op.instances,
13425                        target_groups=self.op.target_groups)
13426     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13427       ial = IAllocator(self.cfg, self.rpc,
13428                        mode=self.op.mode,
13429                        instances=self.op.instances,
13430                        evac_mode=self.op.evac_mode)
13431     else:
13432       raise errors.ProgrammerError("Uncatched mode %s in"
13433                                    " LUTestAllocator.Exec", self.op.mode)
13434
13435     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13436       result = ial.in_text
13437     else:
13438       ial.Run(self.op.allocator, validate=False)
13439       result = ial.out_text
13440     return result
13441
13442
13443 #: Query type implementations
13444 _QUERY_IMPL = {
13445   constants.QR_INSTANCE: _InstanceQuery,
13446   constants.QR_NODE: _NodeQuery,
13447   constants.QR_GROUP: _GroupQuery,
13448   constants.QR_OS: _OsQuery,
13449   }
13450
13451 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13452
13453
13454 def _GetQueryImplementation(name):
13455   """Returns the implemtnation for a query type.
13456
13457   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13458
13459   """
13460   try:
13461     return _QUERY_IMPL[name]
13462   except KeyError:
13463     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13464                                errors.ECODE_INVAL)