code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62
  63 import ganeti.masterd.instance # pylint: disable=W0611
  64
  65
  66 class ResultWithJobs:
  67   """Data container for LU results with jobs.
  68
  69   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  70   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  71   contained in the C{jobs} attribute and include the job IDs in the opcode
  72   result.
  73
  74   """
  75   def __init__(self, jobs, **kwargs):
  76     """Initializes this class.
  77
  78     Additional return values can be specified as keyword arguments.
  79
  80     @type jobs: list of lists of L{opcode.OpCode}
  81     @param jobs: A list of lists of opcode objects
  82
  83     """
  84     self.jobs = jobs
  85     self.other = kwargs
  86
  87
  88 class LogicalUnit(object):
  89   """Logical Unit base class.
  90
  91   Subclasses must follow these rules:
  92     - implement ExpandNames
  93     - implement CheckPrereq (except when tasklets are used)
  94     - implement Exec (except when tasklets are used)
  95     - implement BuildHooksEnv
  96     - implement BuildHooksNodes
  97     - redefine HPATH and HTYPE
  98     - optionally redefine their run requirements:
  99         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 100
 101   Note that all commands require root permissions.
 102
 103   @ivar dry_run_result: the value (if any) that will be returned to the caller
 104       in dry-run mode (signalled by opcode dry_run parameter)
 105
 106   """
 107   HPATH = None
 108   HTYPE = None
 109   REQ_BGL = True
 110
 111   def __init__(self, processor, op, context, rpc):
 112     """Constructor for LogicalUnit.
 113
 114     This needs to be overridden in derived classes in order to check op
 115     validity.
 116
 117     """
 118     self.proc = processor
 119     self.op = op
 120     self.cfg = context.cfg
 121     self.glm = context.glm
 122     # readability alias
 123     self.owned_locks = context.glm.list_owned
 124     self.context = context
 125     self.rpc = rpc
 126     # Dicts used to declare locking needs to mcpu
 127     self.needed_locks = None
 128     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 129     self.add_locks = {}
 130     self.remove_locks = {}
 131     # Used to force good behavior when calling helper functions
 132     self.recalculate_locks = {}
 133     # logging
 134     self.Log = processor.Log # pylint: disable=C0103
 135     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 136     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 137     self.LogStep = processor.LogStep # pylint: disable=C0103
 138     # support for dry-run
 139     self.dry_run_result = None
 140     # support for generic debug attribute
 141     if (not hasattr(self.op, "debug_level") or
 142         not isinstance(self.op.debug_level, int)):
 143       self.op.debug_level = 0
 144
 145     # Tasklets
 146     self.tasklets = None
 147
 148     # Validate opcode parameters and set defaults
 149     self.op.Validate(True)
 150
 151     self.CheckArguments()
 152
 153   def CheckArguments(self):
 154     """Check syntactic validity for the opcode arguments.
 155
 156     This method is for doing a simple syntactic check and ensure
 157     validity of opcode parameters, without any cluster-related
 158     checks. While the same can be accomplished in ExpandNames and/or
 159     CheckPrereq, doing these separate is better because:
 160
 161       - ExpandNames is left as as purely a lock-related function
 162       - CheckPrereq is run after we have acquired locks (and possible
 163         waited for them)
 164
 165     The function is allowed to change the self.op attribute so that
 166     later methods can no longer worry about missing parameters.
 167
 168     """
 169     pass
 170
 171   def ExpandNames(self):
 172     """Expand names for this LU.
 173
 174     This method is called before starting to execute the opcode, and it should
 175     update all the parameters of the opcode to their canonical form (e.g. a
 176     short node name must be fully expanded after this method has successfully
 177     completed). This way locking, hooks, logging, etc. can work correctly.
 178
 179     LUs which implement this method must also populate the self.needed_locks
 180     member, as a dict with lock levels as keys, and a list of needed lock names
 181     as values. Rules:
 182
 183       - use an empty dict if you don't need any lock
 184       - if you don't need any lock at a particular level omit that level
 185       - don't put anything for the BGL level
 186       - if you want all locks at a level use locking.ALL_SET as a value
 187
 188     If you need to share locks (rather than acquire them exclusively) at one
 189     level you can modify self.share_locks, setting a true value (usually 1) for
 190     that level. By default locks are not shared.
 191
 192     This function can also define a list of tasklets, which then will be
 193     executed in order instead of the usual LU-level CheckPrereq and Exec
 194     functions, if those are not defined by the LU.
 195
 196     Examples::
 197
 198       # Acquire all nodes and one instance
 199       self.needed_locks = {
 200         locking.LEVEL_NODE: locking.ALL_SET,
 201         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 202       }
 203       # Acquire just two nodes
 204       self.needed_locks = {
 205         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 206       }
 207       # Acquire no locks
 208       self.needed_locks = {} # No, you can't leave it to the default value None
 209
 210     """
 211     # The implementation of this method is mandatory only if the new LU is
 212     # concurrent, so that old LUs don't need to be changed all at the same
 213     # time.
 214     if self.REQ_BGL:
 215       self.needed_locks = {} # Exclusive LUs don't need locks.
 216     else:
 217       raise NotImplementedError
 218
 219   def DeclareLocks(self, level):
 220     """Declare LU locking needs for a level
 221
 222     While most LUs can just declare their locking needs at ExpandNames time,
 223     sometimes there's the need to calculate some locks after having acquired
 224     the ones before. This function is called just before acquiring locks at a
 225     particular level, but after acquiring the ones at lower levels, and permits
 226     such calculations. It can be used to modify self.needed_locks, and by
 227     default it does nothing.
 228
 229     This function is only called if you have something already set in
 230     self.needed_locks for the level.
 231
 232     @param level: Locking level which is going to be locked
 233     @type level: member of ganeti.locking.LEVELS
 234
 235     """
 236
 237   def CheckPrereq(self):
 238     """Check prerequisites for this LU.
 239
 240     This method should check that the prerequisites for the execution
 241     of this LU are fulfilled. It can do internode communication, but
 242     it should be idempotent - no cluster or system changes are
 243     allowed.
 244
 245     The method should raise errors.OpPrereqError in case something is
 246     not fulfilled. Its return value is ignored.
 247
 248     This method should also update all the parameters of the opcode to
 249     their canonical form if it hasn't been done by ExpandNames before.
 250
 251     """
 252     if self.tasklets is not None:
 253       for (idx, tl) in enumerate(self.tasklets):
 254         logging.debug("Checking prerequisites for tasklet %s/%s",
 255                       idx + 1, len(self.tasklets))
 256         tl.CheckPrereq()
 257     else:
 258       pass
 259
 260   def Exec(self, feedback_fn):
 261     """Execute the LU.
 262
 263     This method should implement the actual work. It should raise
 264     errors.OpExecError for failures that are somewhat dealt with in
 265     code, or expected.
 266
 267     """
 268     if self.tasklets is not None:
 269       for (idx, tl) in enumerate(self.tasklets):
 270         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 271         tl.Exec(feedback_fn)
 272     else:
 273       raise NotImplementedError
 274
 275   def BuildHooksEnv(self):
 276     """Build hooks environment for this LU.
 277
 278     @rtype: dict
 279     @return: Dictionary containing the environment that will be used for
 280       running the hooks for this LU. The keys of the dict must not be prefixed
 281       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 282       will extend the environment with additional variables. If no environment
 283       should be defined, an empty dictionary should be returned (not C{None}).
 284     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 285       will not be called.
 286
 287     """
 288     raise NotImplementedError
 289
 290   def BuildHooksNodes(self):
 291     """Build list of nodes to run LU's hooks.
 292
 293     @rtype: tuple; (list, list)
 294     @return: Tuple containing a list of node names on which the hook
 295       should run before the execution and a list of node names on which the
 296       hook should run after the execution. No nodes should be returned as an
 297       empty list (and not None).
 298     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 299       will not be called.
 300
 301     """
 302     raise NotImplementedError
 303
 304   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 305     """Notify the LU about the results of its hooks.
 306
 307     This method is called every time a hooks phase is executed, and notifies
 308     the Logical Unit about the hooks' result. The LU can then use it to alter
 309     its result based on the hooks.  By default the method does nothing and the
 310     previous result is passed back unchanged but any LU can define it if it
 311     wants to use the local cluster hook-scripts somehow.
 312
 313     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 314         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 315     @param hook_results: the results of the multi-node hooks rpc call
 316     @param feedback_fn: function used send feedback back to the caller
 317     @param lu_result: the previous Exec result this LU had, or None
 318         in the PRE phase
 319     @return: the new Exec result, based on the previous result
 320         and hook results
 321
 322     """
 323     # API must be kept, thus we ignore the unused argument and could
 324     # be a function warnings
 325     # pylint: disable=W0613,R0201
 326     return lu_result
 327
 328   def _ExpandAndLockInstance(self):
 329     """Helper function to expand and lock an instance.
 330
 331     Many LUs that work on an instance take its name in self.op.instance_name
 332     and need to expand it and then declare the expanded name for locking. This
 333     function does it, and then updates self.op.instance_name to the expanded
 334     name. It also initializes needed_locks as a dict, if this hasn't been done
 335     before.
 336
 337     """
 338     if self.needed_locks is None:
 339       self.needed_locks = {}
 340     else:
 341       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 342         "_ExpandAndLockInstance called with instance-level locks set"
 343     self.op.instance_name = _ExpandInstanceName(self.cfg,
 344                                                 self.op.instance_name)
 345     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 346
 347   def _LockInstancesNodes(self, primary_only=False):
 348     """Helper function to declare instances' nodes for locking.
 349
 350     This function should be called after locking one or more instances to lock
 351     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 352     with all primary or secondary nodes for instances already locked and
 353     present in self.needed_locks[locking.LEVEL_INSTANCE].
 354
 355     It should be called from DeclareLocks, and for safety only works if
 356     self.recalculate_locks[locking.LEVEL_NODE] is set.
 357
 358     In the future it may grow parameters to just lock some instance's nodes, or
 359     to just lock primaries or secondary nodes, if needed.
 360
 361     If should be called in DeclareLocks in a way similar to::
 362
 363       if level == locking.LEVEL_NODE:
 364         self._LockInstancesNodes()
 365
 366     @type primary_only: boolean
 367     @param primary_only: only lock primary nodes of locked instances
 368
 369     """
 370     assert locking.LEVEL_NODE in self.recalculate_locks, \
 371       "_LockInstancesNodes helper function called with no nodes to recalculate"
 372
 373     # TODO: check if we're really been called with the instance locks held
 374
 375     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 376     # future we might want to have different behaviors depending on the value
 377     # of self.recalculate_locks[locking.LEVEL_NODE]
 378     wanted_nodes = []
 379     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 380     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 381       wanted_nodes.append(instance.primary_node)
 382       if not primary_only:
 383         wanted_nodes.extend(instance.secondary_nodes)
 384
 385     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 386       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 387     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 388       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 389
 390     del self.recalculate_locks[locking.LEVEL_NODE]
 391
 392
 393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 394   """Simple LU which runs no hooks.
 395
 396   This LU is intended as a parent for other LogicalUnits which will
 397   run no hooks, in order to reduce duplicate code.
 398
 399   """
 400   HPATH = None
 401   HTYPE = None
 402
 403   def BuildHooksEnv(self):
 404     """Empty BuildHooksEnv for NoHooksLu.
 405
 406     This just raises an error.
 407
 408     """
 409     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 410
 411   def BuildHooksNodes(self):
 412     """Empty BuildHooksNodes for NoHooksLU.
 413
 414     """
 415     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 416
 417
 418 class Tasklet:
 419   """Tasklet base class.
 420
 421   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 422   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 423   tasklets know nothing about locks.
 424
 425   Subclasses must follow these rules:
 426     - Implement CheckPrereq
 427     - Implement Exec
 428
 429   """
 430   def __init__(self, lu):
 431     self.lu = lu
 432
 433     # Shortcuts
 434     self.cfg = lu.cfg
 435     self.rpc = lu.rpc
 436
 437   def CheckPrereq(self):
 438     """Check prerequisites for this tasklets.
 439
 440     This method should check whether the prerequisites for the execution of
 441     this tasklet are fulfilled. It can do internode communication, but it
 442     should be idempotent - no cluster or system changes are allowed.
 443
 444     The method should raise errors.OpPrereqError in case something is not
 445     fulfilled. Its return value is ignored.
 446
 447     This method should also update all parameters to their canonical form if it
 448     hasn't been done before.
 449
 450     """
 451     pass
 452
 453   def Exec(self, feedback_fn):
 454     """Execute the tasklet.
 455
 456     This method should implement the actual work. It should raise
 457     errors.OpExecError for failures that are somewhat dealt with in code, or
 458     expected.
 459
 460     """
 461     raise NotImplementedError
 462
 463
 464 class _QueryBase:
 465   """Base for query utility classes.
 466
 467   """
 468   #: Attribute holding field definitions
 469   FIELDS = None
 470
 471   def __init__(self, filter_, fields, use_locking):
 472     """Initializes this class.
 473
 474     """
 475     self.use_locking = use_locking
 476
 477     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 478                              namefield="name")
 479     self.requested_data = self.query.RequestedData()
 480     self.names = self.query.RequestedNames()
 481
 482     # Sort only if no names were requested
 483     self.sort_by_name = not self.names
 484
 485     self.do_locking = None
 486     self.wanted = None
 487
 488   def _GetNames(self, lu, all_names, lock_level):
 489     """Helper function to determine names asked for in the query.
 490
 491     """
 492     if self.do_locking:
 493       names = lu.owned_locks(lock_level)
 494     else:
 495       names = all_names
 496
 497     if self.wanted == locking.ALL_SET:
 498       assert not self.names
 499       # caller didn't specify names, so ordering is not important
 500       return utils.NiceSort(names)
 501
 502     # caller specified names and we must keep the same order
 503     assert self.names
 504     assert not self.do_locking or lu.glm.is_owned(lock_level)
 505
 506     missing = set(self.wanted).difference(names)
 507     if missing:
 508       raise errors.OpExecError("Some items were removed before retrieving"
 509                                " their data: %s" % missing)
 510
 511     # Return expanded names
 512     return self.wanted
 513
 514   def ExpandNames(self, lu):
 515     """Expand names for this query.
 516
 517     See L{LogicalUnit.ExpandNames}.
 518
 519     """
 520     raise NotImplementedError()
 521
 522   def DeclareLocks(self, lu, level):
 523     """Declare locks for this query.
 524
 525     See L{LogicalUnit.DeclareLocks}.
 526
 527     """
 528     raise NotImplementedError()
 529
 530   def _GetQueryData(self, lu):
 531     """Collects all data for this query.
 532
 533     @return: Query data object
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def NewStyleQuery(self, lu):
 539     """Collect data and execute query.
 540
 541     """
 542     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 543                                   sort_by_name=self.sort_by_name)
 544
 545   def OldStyleQuery(self, lu):
 546     """Collect data and execute query.
 547
 548     """
 549     return self.query.OldStyleQuery(self._GetQueryData(lu),
 550                                     sort_by_name=self.sort_by_name)
 551
 552
 553 def _ShareAll():
 554   """Returns a dict declaring all lock levels shared.
 555
 556   """
 557   return dict.fromkeys(locking.LEVELS, 1)
 558
 559
 560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 561   """Checks if the owned node groups are still correct for an instance.
 562
 563   @type cfg: L{config.ConfigWriter}
 564   @param cfg: The cluster configuration
 565   @type instance_name: string
 566   @param instance_name: Instance name
 567   @type owned_groups: set or frozenset
 568   @param owned_groups: List of currently owned node groups
 569
 570   """
 571   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 572
 573   if not owned_groups.issuperset(inst_groups):
 574     raise errors.OpPrereqError("Instance %s's node groups changed since"
 575                                " locks were acquired, current groups are"
 576                                " are '%s', owning groups '%s'; retry the"
 577                                " operation" %
 578                                (instance_name,
 579                                 utils.CommaJoin(inst_groups),
 580                                 utils.CommaJoin(owned_groups)),
 581                                errors.ECODE_STATE)
 582
 583   return inst_groups
 584
 585
 586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 587   """Checks if the instances in a node group are still correct.
 588
 589   @type cfg: L{config.ConfigWriter}
 590   @param cfg: The cluster configuration
 591   @type group_uuid: string
 592   @param group_uuid: Node group UUID
 593   @type owned_instances: set or frozenset
 594   @param owned_instances: List of currently owned instances
 595
 596   """
 597   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 598   if owned_instances != wanted_instances:
 599     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 600                                " locks were acquired, wanted '%s', have '%s';"
 601                                " retry the operation" %
 602                                (group_uuid,
 603                                 utils.CommaJoin(wanted_instances),
 604                                 utils.CommaJoin(owned_instances)),
 605                                errors.ECODE_STATE)
 606
 607   return wanted_instances
 608
 609
 610 def _SupportsOob(cfg, node):
 611   """Tells if node supports OOB.
 612
 613   @type cfg: L{config.ConfigWriter}
 614   @param cfg: The cluster configuration
 615   @type node: L{objects.Node}
 616   @param node: The node
 617   @return: The OOB script if supported or an empty string otherwise
 618
 619   """
 620   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 621
 622
 623 def _GetWantedNodes(lu, nodes):
 624   """Returns list of checked and expanded node names.
 625
 626   @type lu: L{LogicalUnit}
 627   @param lu: the logical unit on whose behalf we execute
 628   @type nodes: list
 629   @param nodes: list of node names or None for all nodes
 630   @rtype: list
 631   @return: the list of nodes, sorted
 632   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 633
 634   """
 635   if nodes:
 636     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 637
 638   return utils.NiceSort(lu.cfg.GetNodeList())
 639
 640
 641 def _GetWantedInstances(lu, instances):
 642   """Returns list of checked and expanded instance names.
 643
 644   @type lu: L{LogicalUnit}
 645   @param lu: the logical unit on whose behalf we execute
 646   @type instances: list
 647   @param instances: list of instance names or None for all instances
 648   @rtype: list
 649   @return: the list of instances, sorted
 650   @raise errors.OpPrereqError: if the instances parameter is wrong type
 651   @raise errors.OpPrereqError: if any of the passed instances is not found
 652
 653   """
 654   if instances:
 655     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 656   else:
 657     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 658   return wanted
 659
 660
 661 def _GetUpdatedParams(old_params, update_dict,
 662                       use_default=True, use_none=False):
 663   """Return the new version of a parameter dictionary.
 664
 665   @type old_params: dict
 666   @param old_params: old parameters
 667   @type update_dict: dict
 668   @param update_dict: dict containing new parameter values, or
 669       constants.VALUE_DEFAULT to reset the parameter to its default
 670       value
 671   @param use_default: boolean
 672   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 673       values as 'to be deleted' values
 674   @param use_none: boolean
 675   @type use_none: whether to recognise C{None} values as 'to be
 676       deleted' values
 677   @rtype: dict
 678   @return: the new parameter dictionary
 679
 680   """
 681   params_copy = copy.deepcopy(old_params)
 682   for key, val in update_dict.iteritems():
 683     if ((use_default and val == constants.VALUE_DEFAULT) or
 684         (use_none and val is None)):
 685       try:
 686         del params_copy[key]
 687       except KeyError:
 688         pass
 689     else:
 690       params_copy[key] = val
 691   return params_copy
 692
 693
 694 def _ReleaseLocks(lu, level, names=None, keep=None):
 695   """Releases locks owned by an LU.
 696
 697   @type lu: L{LogicalUnit}
 698   @param level: Lock level
 699   @type names: list or None
 700   @param names: Names of locks to release
 701   @type keep: list or None
 702   @param keep: Names of locks to retain
 703
 704   """
 705   assert not (keep is not None and names is not None), \
 706          "Only one of the 'names' and the 'keep' parameters can be given"
 707
 708   if names is not None:
 709     should_release = names.__contains__
 710   elif keep:
 711     should_release = lambda name: name not in keep
 712   else:
 713     should_release = None
 714
 715   if should_release:
 716     retain = []
 717     release = []
 718
 719     # Determine which locks to release
 720     for name in lu.owned_locks(level):
 721       if should_release(name):
 722         release.append(name)
 723       else:
 724         retain.append(name)
 725
 726     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 727
 728     # Release just some locks
 729     lu.glm.release(level, names=release)
 730
 731     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 732   else:
 733     # Release everything
 734     lu.glm.release(level)
 735
 736     assert not lu.glm.is_owned(level), "No locks should be owned"
 737
 738
 739 def _MapInstanceDisksToNodes(instances):
 740   """Creates a map from (node, volume) to instance name.
 741
 742   @type instances: list of L{objects.Instance}
 743   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 744
 745   """
 746   return dict(((node, vol), inst.name)
 747               for inst in instances
 748               for (node, vols) in inst.MapLVsByNode().items()
 749               for vol in vols)
 750
 751
 752 def _RunPostHook(lu, node_name):
 753   """Runs the post-hook for an opcode on a single node.
 754
 755   """
 756   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 757   try:
 758     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 759   except:
 760     # pylint: disable=W0702
 761     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 762
 763
 764 def _CheckOutputFields(static, dynamic, selected):
 765   """Checks whether all selected fields are valid.
 766
 767   @type static: L{utils.FieldSet}
 768   @param static: static fields set
 769   @type dynamic: L{utils.FieldSet}
 770   @param dynamic: dynamic fields set
 771
 772   """
 773   f = utils.FieldSet()
 774   f.Extend(static)
 775   f.Extend(dynamic)
 776
 777   delta = f.NonMatching(selected)
 778   if delta:
 779     raise errors.OpPrereqError("Unknown output fields selected: %s"
 780                                % ",".join(delta), errors.ECODE_INVAL)
 781
 782
 783 def _CheckGlobalHvParams(params):
 784   """Validates that given hypervisor params are not global ones.
 785
 786   This will ensure that instances don't get customised versions of
 787   global params.
 788
 789   """
 790   used_globals = constants.HVC_GLOBALS.intersection(params)
 791   if used_globals:
 792     msg = ("The following hypervisor parameters are global and cannot"
 793            " be customized at instance level, please modify them at"
 794            " cluster level: %s" % utils.CommaJoin(used_globals))
 795     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 796
 797
 798 def _CheckNodeOnline(lu, node, msg=None):
 799   """Ensure that a given node is online.
 800
 801   @param lu: the LU on behalf of which we make the check
 802   @param node: the node to check
 803   @param msg: if passed, should be a message to replace the default one
 804   @raise errors.OpPrereqError: if the node is offline
 805
 806   """
 807   if msg is None:
 808     msg = "Can't use offline node"
 809   if lu.cfg.GetNodeInfo(node).offline:
 810     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 811
 812
 813 def _CheckNodeNotDrained(lu, node):
 814   """Ensure that a given node is not drained.
 815
 816   @param lu: the LU on behalf of which we make the check
 817   @param node: the node to check
 818   @raise errors.OpPrereqError: if the node is drained
 819
 820   """
 821   if lu.cfg.GetNodeInfo(node).drained:
 822     raise errors.OpPrereqError("Can't use drained node %s" % node,
 823                                errors.ECODE_STATE)
 824
 825
 826 def _CheckNodeVmCapable(lu, node):
 827   """Ensure that a given node is vm capable.
 828
 829   @param lu: the LU on behalf of which we make the check
 830   @param node: the node to check
 831   @raise errors.OpPrereqError: if the node is not vm capable
 832
 833   """
 834   if not lu.cfg.GetNodeInfo(node).vm_capable:
 835     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 836                                errors.ECODE_STATE)
 837
 838
 839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 840   """Ensure that a node supports a given OS.
 841
 842   @param lu: the LU on behalf of which we make the check
 843   @param node: the node to check
 844   @param os_name: the OS to query about
 845   @param force_variant: whether to ignore variant errors
 846   @raise errors.OpPrereqError: if the node is not supporting the OS
 847
 848   """
 849   result = lu.rpc.call_os_get(node, os_name)
 850   result.Raise("OS '%s' not in supported OS list for node %s" %
 851                (os_name, node),
 852                prereq=True, ecode=errors.ECODE_INVAL)
 853   if not force_variant:
 854     _CheckOSVariant(result.payload, os_name)
 855
 856
 857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 858   """Ensure that a node has the given secondary ip.
 859
 860   @type lu: L{LogicalUnit}
 861   @param lu: the LU on behalf of which we make the check
 862   @type node: string
 863   @param node: the node to check
 864   @type secondary_ip: string
 865   @param secondary_ip: the ip to check
 866   @type prereq: boolean
 867   @param prereq: whether to throw a prerequisite or an execute error
 868   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 869   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 870
 871   """
 872   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 873   result.Raise("Failure checking secondary ip on node %s" % node,
 874                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 875   if not result.payload:
 876     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 877            " please fix and re-run this command" % secondary_ip)
 878     if prereq:
 879       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 880     else:
 881       raise errors.OpExecError(msg)
 882
 883
 884 def _GetClusterDomainSecret():
 885   """Reads the cluster domain secret.
 886
 887   """
 888   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 889                                strict=True)
 890
 891
 892 def _CheckInstanceDown(lu, instance, reason):
 893   """Ensure that an instance is not running."""
 894   if instance.admin_up:
 895     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 896                                (instance.name, reason), errors.ECODE_STATE)
 897
 898   pnode = instance.primary_node
 899   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 900   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 901               prereq=True, ecode=errors.ECODE_ENVIRON)
 902
 903   if instance.name in ins_l.payload:
 904     raise errors.OpPrereqError("Instance %s is running, %s" %
 905                                (instance.name, reason), errors.ECODE_STATE)
 906
 907
 908 def _ExpandItemName(fn, name, kind):
 909   """Expand an item name.
 910
 911   @param fn: the function to use for expansion
 912   @param name: requested item name
 913   @param kind: text description ('Node' or 'Instance')
 914   @return: the resolved (full) name
 915   @raise errors.OpPrereqError: if the item is not found
 916
 917   """
 918   full_name = fn(name)
 919   if full_name is None:
 920     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 921                                errors.ECODE_NOENT)
 922   return full_name
 923
 924
 925 def _ExpandNodeName(cfg, name):
 926   """Wrapper over L{_ExpandItemName} for nodes."""
 927   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 928
 929
 930 def _ExpandInstanceName(cfg, name):
 931   """Wrapper over L{_ExpandItemName} for instance."""
 932   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 933
 934
 935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 936                           memory, vcpus, nics, disk_template, disks,
 937                           bep, hvp, hypervisor_name, tags):
 938   """Builds instance related env variables for hooks
 939
 940   This builds the hook environment from individual variables.
 941
 942   @type name: string
 943   @param name: the name of the instance
 944   @type primary_node: string
 945   @param primary_node: the name of the instance's primary node
 946   @type secondary_nodes: list
 947   @param secondary_nodes: list of secondary nodes as strings
 948   @type os_type: string
 949   @param os_type: the name of the instance's OS
 950   @type status: boolean
 951   @param status: the should_run status of the instance
 952   @type memory: string
 953   @param memory: the memory size of the instance
 954   @type vcpus: string
 955   @param vcpus: the count of VCPUs the instance has
 956   @type nics: list
 957   @param nics: list of tuples (ip, mac, mode, link) representing
 958       the NICs the instance has
 959   @type disk_template: string
 960   @param disk_template: the disk template of the instance
 961   @type disks: list
 962   @param disks: the list of (size, mode) pairs
 963   @type bep: dict
 964   @param bep: the backend parameters for the instance
 965   @type hvp: dict
 966   @param hvp: the hypervisor parameters for the instance
 967   @type hypervisor_name: string
 968   @param hypervisor_name: the hypervisor for the instance
 969   @type tags: list
 970   @param tags: list of instance tags as strings
 971   @rtype: dict
 972   @return: the hook environment for this instance
 973
 974   """
 975   if status:
 976     str_status = "up"
 977   else:
 978     str_status = "down"
 979   env = {
 980     "OP_TARGET": name,
 981     "INSTANCE_NAME": name,
 982     "INSTANCE_PRIMARY": primary_node,
 983     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 984     "INSTANCE_OS_TYPE": os_type,
 985     "INSTANCE_STATUS": str_status,
 986     "INSTANCE_MEMORY": memory,
 987     "INSTANCE_VCPUS": vcpus,
 988     "INSTANCE_DISK_TEMPLATE": disk_template,
 989     "INSTANCE_HYPERVISOR": hypervisor_name,
 990   }
 991
 992   if nics:
 993     nic_count = len(nics)
 994     for idx, (ip, mac, mode, link) in enumerate(nics):
 995       if ip is None:
 996         ip = ""
 997       env["INSTANCE_NIC%d_IP" % idx] = ip
 998       env["INSTANCE_NIC%d_MAC" % idx] = mac
 999       env["INSTANCE_NIC%d_MODE" % idx] = mode
1000       env["INSTANCE_NIC%d_LINK" % idx] = link
1001       if mode == constants.NIC_MODE_BRIDGED:
1002         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003   else:
1004     nic_count = 0
1005
1006   env["INSTANCE_NIC_COUNT"] = nic_count
1007
1008   if disks:
1009     disk_count = len(disks)
1010     for idx, (size, mode) in enumerate(disks):
1011       env["INSTANCE_DISK%d_SIZE" % idx] = size
1012       env["INSTANCE_DISK%d_MODE" % idx] = mode
1013   else:
1014     disk_count = 0
1015
1016   env["INSTANCE_DISK_COUNT"] = disk_count
1017
1018   if not tags:
1019     tags = []
1020
1021   env["INSTANCE_TAGS"] = " ".join(tags)
1022
1023   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024     for key, value in source.items():
1025       env["INSTANCE_%s_%s" % (kind, key)] = value
1026
1027   return env
1028
1029
1030 def _NICListToTuple(lu, nics):
1031   """Build a list of nic information tuples.
1032
1033   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034   value in LUInstanceQueryData.
1035
1036   @type lu:  L{LogicalUnit}
1037   @param lu: the logical unit on whose behalf we execute
1038   @type nics: list of L{objects.NIC}
1039   @param nics: list of nics to convert to hooks tuples
1040
1041   """
1042   hooks_nics = []
1043   cluster = lu.cfg.GetClusterInfo()
1044   for nic in nics:
1045     ip = nic.ip
1046     mac = nic.mac
1047     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048     mode = filled_params[constants.NIC_MODE]
1049     link = filled_params[constants.NIC_LINK]
1050     hooks_nics.append((ip, mac, mode, link))
1051   return hooks_nics
1052
1053
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055   """Builds instance related env variables for hooks from an object.
1056
1057   @type lu: L{LogicalUnit}
1058   @param lu: the logical unit on whose behalf we execute
1059   @type instance: L{objects.Instance}
1060   @param instance: the instance for which we should build the
1061       environment
1062   @type override: dict
1063   @param override: dictionary with key/values that will override
1064       our values
1065   @rtype: dict
1066   @return: the hook environment dictionary
1067
1068   """
1069   cluster = lu.cfg.GetClusterInfo()
1070   bep = cluster.FillBE(instance)
1071   hvp = cluster.FillHV(instance)
1072   args = {
1073     "name": instance.name,
1074     "primary_node": instance.primary_node,
1075     "secondary_nodes": instance.secondary_nodes,
1076     "os_type": instance.os,
1077     "status": instance.admin_up,
1078     "memory": bep[constants.BE_MEMORY],
1079     "vcpus": bep[constants.BE_VCPUS],
1080     "nics": _NICListToTuple(lu, instance.nics),
1081     "disk_template": instance.disk_template,
1082     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083     "bep": bep,
1084     "hvp": hvp,
1085     "hypervisor_name": instance.hypervisor,
1086     "tags": instance.tags,
1087   }
1088   if override:
1089     args.update(override)
1090   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091
1092
1093 def _AdjustCandidatePool(lu, exceptions):
1094   """Adjust the candidate pool after node operations.
1095
1096   """
1097   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098   if mod_list:
1099     lu.LogInfo("Promoted nodes to master candidate role: %s",
1100                utils.CommaJoin(node.name for node in mod_list))
1101     for name in mod_list:
1102       lu.context.ReaddNode(name)
1103   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104   if mc_now > mc_max:
1105     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106                (mc_now, mc_max))
1107
1108
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110   """Decide whether I should promote myself as a master candidate.
1111
1112   """
1113   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115   # the new node will increase mc_max with one, so:
1116   mc_should = min(mc_should + 1, cp_size)
1117   return mc_now < mc_should
1118
1119
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121   """Check that the brigdes needed by a list of nics exist.
1122
1123   """
1124   cluster = lu.cfg.GetClusterInfo()
1125   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126   brlist = [params[constants.NIC_LINK] for params in paramslist
1127             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128   if brlist:
1129     result = lu.rpc.call_bridges_exist(target_node, brlist)
1130     result.Raise("Error checking bridges on destination node '%s'" %
1131                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132
1133
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135   """Check that the brigdes needed by an instance exist.
1136
1137   """
1138   if node is None:
1139     node = instance.primary_node
1140   _CheckNicsBridgesExist(lu, instance.nics, node)
1141
1142
1143 def _CheckOSVariant(os_obj, name):
1144   """Check whether an OS name conforms to the os variants specification.
1145
1146   @type os_obj: L{objects.OS}
1147   @param os_obj: OS object to check
1148   @type name: string
1149   @param name: OS name passed by the user, to check for validity
1150
1151   """
1152   variant = objects.OS.GetVariant(name)
1153   if not os_obj.supported_variants:
1154     if variant:
1155       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156                                  " passed)" % (os_obj.name, variant),
1157                                  errors.ECODE_INVAL)
1158     return
1159   if not variant:
1160     raise errors.OpPrereqError("OS name must include a variant",
1161                                errors.ECODE_INVAL)
1162
1163   if variant not in os_obj.supported_variants:
1164     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165
1166
1167 def _GetNodeInstancesInner(cfg, fn):
1168   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169
1170
1171 def _GetNodeInstances(cfg, node_name):
1172   """Returns a list of all primary and secondary instances on a node.
1173
1174   """
1175
1176   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177
1178
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180   """Returns primary instances on a node.
1181
1182   """
1183   return _GetNodeInstancesInner(cfg,
1184                                 lambda inst: node_name == inst.primary_node)
1185
1186
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188   """Returns secondary instances on a node.
1189
1190   """
1191   return _GetNodeInstancesInner(cfg,
1192                                 lambda inst: node_name in inst.secondary_nodes)
1193
1194
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196   """Returns the arguments for a storage type.
1197
1198   """
1199   # Special case for file storage
1200   if storage_type == constants.ST_FILE:
1201     # storage.FileStorage wants a list of storage directories
1202     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203
1204   return []
1205
1206
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208   faulty = []
1209
1210   for dev in instance.disks:
1211     cfg.SetDiskID(dev, node_name)
1212
1213   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214   result.Raise("Failed to get disk status from node %s" % node_name,
1215                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216
1217   for idx, bdev_status in enumerate(result.payload):
1218     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219       faulty.append(idx)
1220
1221   return faulty
1222
1223
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225   """Check the sanity of iallocator and node arguments and use the
1226   cluster-wide iallocator if appropriate.
1227
1228   Check that at most one of (iallocator, node) is specified. If none is
1229   specified, then the LU's opcode's iallocator slot is filled with the
1230   cluster-wide default iallocator.
1231
1232   @type iallocator_slot: string
1233   @param iallocator_slot: the name of the opcode iallocator slot
1234   @type node_slot: string
1235   @param node_slot: the name of the opcode target node slot
1236
1237   """
1238   node = getattr(lu.op, node_slot, None)
1239   iallocator = getattr(lu.op, iallocator_slot, None)
1240
1241   if node is not None and iallocator is not None:
1242     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243                                errors.ECODE_INVAL)
1244   elif node is None and iallocator is None:
1245     default_iallocator = lu.cfg.GetDefaultIAllocator()
1246     if default_iallocator:
1247       setattr(lu.op, iallocator_slot, default_iallocator)
1248     else:
1249       raise errors.OpPrereqError("No iallocator or node given and no"
1250                                  " cluster-wide default iallocator found;"
1251                                  " please specify either an iallocator or a"
1252                                  " node, or set a cluster-wide default"
1253                                  " iallocator")
1254
1255
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257   """Decides on which iallocator to use.
1258
1259   @type cfg: L{config.ConfigWriter}
1260   @param cfg: Cluster configuration object
1261   @type iallocator: string or None
1262   @param iallocator: Iallocator specified in opcode
1263   @rtype: string
1264   @return: Iallocator name
1265
1266   """
1267   if not iallocator:
1268     # Use default iallocator
1269     iallocator = cfg.GetDefaultIAllocator()
1270
1271   if not iallocator:
1272     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273                                " opcode nor as a cluster-wide default",
1274                                errors.ECODE_INVAL)
1275
1276   return iallocator
1277
1278
1279 class LUClusterPostInit(LogicalUnit):
1280   """Logical unit for running hooks after cluster initialization.
1281
1282   """
1283   HPATH = "cluster-init"
1284   HTYPE = constants.HTYPE_CLUSTER
1285
1286   def BuildHooksEnv(self):
1287     """Build hooks env.
1288
1289     """
1290     return {
1291       "OP_TARGET": self.cfg.GetClusterName(),
1292       }
1293
1294   def BuildHooksNodes(self):
1295     """Build hooks nodes.
1296
1297     """
1298     return ([], [self.cfg.GetMasterNode()])
1299
1300   def Exec(self, feedback_fn):
1301     """Nothing to do.
1302
1303     """
1304     return True
1305
1306
1307 class LUClusterDestroy(LogicalUnit):
1308   """Logical unit for destroying the cluster.
1309
1310   """
1311   HPATH = "cluster-destroy"
1312   HTYPE = constants.HTYPE_CLUSTER
1313
1314   def BuildHooksEnv(self):
1315     """Build hooks env.
1316
1317     """
1318     return {
1319       "OP_TARGET": self.cfg.GetClusterName(),
1320       }
1321
1322   def BuildHooksNodes(self):
1323     """Build hooks nodes.
1324
1325     """
1326     return ([], [])
1327
1328   def CheckPrereq(self):
1329     """Check prerequisites.
1330
1331     This checks whether the cluster is empty.
1332
1333     Any errors are signaled by raising errors.OpPrereqError.
1334
1335     """
1336     master = self.cfg.GetMasterNode()
1337
1338     nodelist = self.cfg.GetNodeList()
1339     if len(nodelist) != 1 or nodelist[0] != master:
1340       raise errors.OpPrereqError("There are still %d node(s) in"
1341                                  " this cluster." % (len(nodelist) - 1),
1342                                  errors.ECODE_INVAL)
1343     instancelist = self.cfg.GetInstanceList()
1344     if instancelist:
1345       raise errors.OpPrereqError("There are still %d instance(s) in"
1346                                  " this cluster." % len(instancelist),
1347                                  errors.ECODE_INVAL)
1348
1349   def Exec(self, feedback_fn):
1350     """Destroys the cluster.
1351
1352     """
1353     master = self.cfg.GetMasterNode()
1354
1355     # Run post hooks on master node before it's removed
1356     _RunPostHook(self, master)
1357
1358     result = self.rpc.call_node_stop_master(master, False)
1359     result.Raise("Could not disable the master role")
1360
1361     return master
1362
1363
1364 def _VerifyCertificate(filename):
1365   """Verifies a certificate for L{LUClusterVerifyConfig}.
1366
1367   @type filename: string
1368   @param filename: Path to PEM file
1369
1370   """
1371   try:
1372     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373                                            utils.ReadFile(filename))
1374   except Exception, err: # pylint: disable=W0703
1375     return (LUClusterVerifyConfig.ETYPE_ERROR,
1376             "Failed to load X509 certificate %s: %s" % (filename, err))
1377
1378   (errcode, msg) = \
1379     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380                                 constants.SSL_CERT_EXPIRATION_ERROR)
1381
1382   if msg:
1383     fnamemsg = "While verifying %s: %s" % (filename, msg)
1384   else:
1385     fnamemsg = None
1386
1387   if errcode is None:
1388     return (None, fnamemsg)
1389   elif errcode == utils.CERT_WARNING:
1390     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391   elif errcode == utils.CERT_ERROR:
1392     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393
1394   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395
1396
1397 def _GetAllHypervisorParameters(cluster, instances):
1398   """Compute the set of all hypervisor parameters.
1399
1400   @type cluster: L{objects.Cluster}
1401   @param cluster: the cluster object
1402   @param instances: list of L{objects.Instance}
1403   @param instances: additional instances from which to obtain parameters
1404   @rtype: list of (origin, hypervisor, parameters)
1405   @return: a list with all parameters found, indicating the hypervisor they
1406        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407
1408   """
1409   hvp_data = []
1410
1411   for hv_name in cluster.enabled_hypervisors:
1412     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413
1414   for os_name, os_hvp in cluster.os_hvp.items():
1415     for hv_name, hv_params in os_hvp.items():
1416       if hv_params:
1417         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419
1420   # TODO: collapse identical parameter values in a single one
1421   for instance in instances:
1422     if instance.hvparams:
1423       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424                        cluster.FillHV(instance)))
1425
1426   return hvp_data
1427
1428
1429 class _VerifyErrors(object):
1430   """Mix-in for cluster/group verify LUs.
1431
1432   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433   self.op and self._feedback_fn to be available.)
1434
1435   """
1436   TCLUSTER = "cluster"
1437   TNODE = "node"
1438   TINSTANCE = "instance"
1439
1440   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443   ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444   ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452   ENODEDRBD = (TNODE, "ENODEDRBD")
1453   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456   ENODEHV = (TNODE, "ENODEHV")
1457   ENODELVM = (TNODE, "ENODELVM")
1458   ENODEN1 = (TNODE, "ENODEN1")
1459   ENODENET = (TNODE, "ENODENET")
1460   ENODEOS = (TNODE, "ENODEOS")
1461   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463   ENODERPC = (TNODE, "ENODERPC")
1464   ENODESSH = (TNODE, "ENODESSH")
1465   ENODEVERSION = (TNODE, "ENODEVERSION")
1466   ENODESETUP = (TNODE, "ENODESETUP")
1467   ENODETIME = (TNODE, "ENODETIME")
1468   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469
1470   ETYPE_FIELD = "code"
1471   ETYPE_ERROR = "ERROR"
1472   ETYPE_WARNING = "WARNING"
1473
1474   def _Error(self, ecode, item, msg, *args, **kwargs):
1475     """Format an error message.
1476
1477     Based on the opcode's error_codes parameter, either format a
1478     parseable error code, or a simpler error string.
1479
1480     This must be called only from Exec and functions called from Exec.
1481
1482     """
1483     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484     itype, etxt = ecode
1485     # first complete the msg
1486     if args:
1487       msg = msg % args
1488     # then format the whole message
1489     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491     else:
1492       if item:
1493         item = " " + item
1494       else:
1495         item = ""
1496       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497     # and finally report it via the feedback_fn
1498     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499
1500   def _ErrorIf(self, cond, *args, **kwargs):
1501     """Log an error message if the passed condition is True.
1502
1503     """
1504     cond = (bool(cond)
1505             or self.op.debug_simulate_errors) # pylint: disable=E1101
1506     if cond:
1507       self._Error(*args, **kwargs)
1508     # do not mark the operation as failed for WARN cases only
1509     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510       self.bad = self.bad or cond
1511
1512
1513 class LUClusterVerify(NoHooksLU):
1514   """Submits all jobs necessary to verify the cluster.
1515
1516   """
1517   REQ_BGL = False
1518
1519   def ExpandNames(self):
1520     self.needed_locks = {}
1521
1522   def Exec(self, feedback_fn):
1523     jobs = []
1524
1525     if self.op.group_name:
1526       groups = [self.op.group_name]
1527       depends_fn = lambda: None
1528     else:
1529       groups = self.cfg.GetNodeGroupList()
1530
1531       # Verify global configuration
1532       jobs.append([opcodes.OpClusterVerifyConfig()])
1533
1534       # Always depend on global verification
1535       depends_fn = lambda: [(-len(jobs), [])]
1536
1537     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538                                               depends=depends_fn())]
1539                 for group in groups)
1540
1541     # Fix up all parameters
1542     for op in itertools.chain(*jobs): # pylint: disable=W0142
1543       op.debug_simulate_errors = self.op.debug_simulate_errors
1544       op.verbose = self.op.verbose
1545       op.error_codes = self.op.error_codes
1546       try:
1547         op.skip_checks = self.op.skip_checks
1548       except AttributeError:
1549         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550
1551     return ResultWithJobs(jobs)
1552
1553
1554 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555   """Verifies the cluster config.
1556
1557   """
1558   REQ_BGL = True
1559
1560   def _VerifyHVP(self, hvp_data):
1561     """Verifies locally the syntax of the hypervisor parameters.
1562
1563     """
1564     for item, hv_name, hv_params in hvp_data:
1565       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566              (item, hv_name))
1567       try:
1568         hv_class = hypervisor.GetHypervisor(hv_name)
1569         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570         hv_class.CheckParameterSyntax(hv_params)
1571       except errors.GenericError, err:
1572         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573
1574   def ExpandNames(self):
1575     # Information can be safely retrieved as the BGL is acquired in exclusive
1576     # mode
1577     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579     self.all_node_info = self.cfg.GetAllNodesInfo()
1580     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581     self.needed_locks = {}
1582
1583   def Exec(self, feedback_fn):
1584     """Verify integrity of cluster, performing various test on nodes.
1585
1586     """
1587     self.bad = False
1588     self._feedback_fn = feedback_fn
1589
1590     feedback_fn("* Verifying cluster config")
1591
1592     for msg in self.cfg.VerifyConfig():
1593       self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594
1595     feedback_fn("* Verifying cluster certificate files")
1596
1597     for cert_filename in constants.ALL_CERT_FILES:
1598       (errcode, msg) = _VerifyCertificate(cert_filename)
1599       self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600
1601     feedback_fn("* Verifying hypervisor parameters")
1602
1603     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604                                                 self.all_inst_info.values()))
1605
1606     feedback_fn("* Verifying all nodes belong to an existing group")
1607
1608     # We do this verification here because, should this bogus circumstance
1609     # occur, it would never be caught by VerifyGroup, which only acts on
1610     # nodes/instances reachable from existing node groups.
1611
1612     dangling_nodes = set(node.name for node in self.all_node_info.values()
1613                          if node.group not in self.all_group_info)
1614
1615     dangling_instances = {}
1616     no_node_instances = []
1617
1618     for inst in self.all_inst_info.values():
1619       if inst.primary_node in dangling_nodes:
1620         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621       elif inst.primary_node not in self.all_node_info:
1622         no_node_instances.append(inst.name)
1623
1624     pretty_dangling = [
1625         "%s (%s)" %
1626         (node.name,
1627          utils.CommaJoin(dangling_instances.get(node.name,
1628                                                 ["no instances"])))
1629         for node in dangling_nodes]
1630
1631     self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632                   "the following nodes (and their instances) belong to a non"
1633                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1634
1635     self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636                   "the following instances have a non-existing primary-node:"
1637                   " %s", utils.CommaJoin(no_node_instances))
1638
1639     return not self.bad
1640
1641
1642 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643   """Verifies the status of a node group.
1644
1645   """
1646   HPATH = "cluster-verify"
1647   HTYPE = constants.HTYPE_CLUSTER
1648   REQ_BGL = False
1649
1650   _HOOKS_INDENT_RE = re.compile("^", re.M)
1651
1652   class NodeImage(object):
1653     """A class representing the logical and physical status of a node.
1654
1655     @type name: string
1656     @ivar name: the node name to which this object refers
1657     @ivar volumes: a structure as returned from
1658         L{ganeti.backend.GetVolumeList} (runtime)
1659     @ivar instances: a list of running instances (runtime)
1660     @ivar pinst: list of configured primary instances (config)
1661     @ivar sinst: list of configured secondary instances (config)
1662     @ivar sbp: dictionary of {primary-node: list of instances} for all
1663         instances for which this node is secondary (config)
1664     @ivar mfree: free memory, as reported by hypervisor (runtime)
1665     @ivar dfree: free disk, as reported by the node (runtime)
1666     @ivar offline: the offline status (config)
1667     @type rpc_fail: boolean
1668     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669         not whether the individual keys were correct) (runtime)
1670     @type lvm_fail: boolean
1671     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672     @type hyp_fail: boolean
1673     @ivar hyp_fail: whether the RPC call didn't return the instance list
1674     @type ghost: boolean
1675     @ivar ghost: whether this is a known node or not (config)
1676     @type os_fail: boolean
1677     @ivar os_fail: whether the RPC call didn't return valid OS data
1678     @type oslist: list
1679     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680     @type vm_capable: boolean
1681     @ivar vm_capable: whether the node can host instances
1682
1683     """
1684     def __init__(self, offline=False, name=None, vm_capable=True):
1685       self.name = name
1686       self.volumes = {}
1687       self.instances = []
1688       self.pinst = []
1689       self.sinst = []
1690       self.sbp = {}
1691       self.mfree = 0
1692       self.dfree = 0
1693       self.offline = offline
1694       self.vm_capable = vm_capable
1695       self.rpc_fail = False
1696       self.lvm_fail = False
1697       self.hyp_fail = False
1698       self.ghost = False
1699       self.os_fail = False
1700       self.oslist = {}
1701
1702   def ExpandNames(self):
1703     # This raises errors.OpPrereqError on its own:
1704     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705
1706     # Get instances in node group; this is unsafe and needs verification later
1707     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1708
1709     self.needed_locks = {
1710       locking.LEVEL_INSTANCE: inst_names,
1711       locking.LEVEL_NODEGROUP: [self.group_uuid],
1712       locking.LEVEL_NODE: [],
1713       }
1714
1715     self.share_locks = _ShareAll()
1716
1717   def DeclareLocks(self, level):
1718     if level == locking.LEVEL_NODE:
1719       # Get members of node group; this is unsafe and needs verification later
1720       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1721
1722       all_inst_info = self.cfg.GetAllInstancesInfo()
1723
1724       # In Exec(), we warn about mirrored instances that have primary and
1725       # secondary living in separate node groups. To fully verify that
1726       # volumes for these instances are healthy, we will need to do an
1727       # extra call to their secondaries. We ensure here those nodes will
1728       # be locked.
1729       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730         # Important: access only the instances whose lock is owned
1731         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732           nodes.update(all_inst_info[inst].secondary_nodes)
1733
1734       self.needed_locks[locking.LEVEL_NODE] = nodes
1735
1736   def CheckPrereq(self):
1737     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1739
1740     group_nodes = set(self.group_info.members)
1741     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1742
1743     unlocked_nodes = \
1744         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1745
1746     unlocked_instances = \
1747         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1748
1749     if unlocked_nodes:
1750       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751                                  utils.CommaJoin(unlocked_nodes))
1752
1753     if unlocked_instances:
1754       raise errors.OpPrereqError("Missing lock for instances: %s" %
1755                                  utils.CommaJoin(unlocked_instances))
1756
1757     self.all_node_info = self.cfg.GetAllNodesInfo()
1758     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1759
1760     self.my_node_names = utils.NiceSort(group_nodes)
1761     self.my_inst_names = utils.NiceSort(group_instances)
1762
1763     self.my_node_info = dict((name, self.all_node_info[name])
1764                              for name in self.my_node_names)
1765
1766     self.my_inst_info = dict((name, self.all_inst_info[name])
1767                              for name in self.my_inst_names)
1768
1769     # We detect here the nodes that will need the extra RPC calls for verifying
1770     # split LV volumes; they should be locked.
1771     extra_lv_nodes = set()
1772
1773     for inst in self.my_inst_info.values():
1774       if inst.disk_template in constants.DTS_INT_MIRROR:
1775         group = self.my_node_info[inst.primary_node].group
1776         for nname in inst.secondary_nodes:
1777           if self.all_node_info[nname].group != group:
1778             extra_lv_nodes.add(nname)
1779
1780     unlocked_lv_nodes = \
1781         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1782
1783     if unlocked_lv_nodes:
1784       raise errors.OpPrereqError("these nodes could be locked: %s" %
1785                                  utils.CommaJoin(unlocked_lv_nodes))
1786     self.extra_lv_nodes = list(extra_lv_nodes)
1787
1788   def _VerifyNode(self, ninfo, nresult):
1789     """Perform some basic validation on data returned from a node.
1790
1791       - check the result data structure is well formed and has all the
1792         mandatory fields
1793       - check ganeti version
1794
1795     @type ninfo: L{objects.Node}
1796     @param ninfo: the node to check
1797     @param nresult: the results from the node
1798     @rtype: boolean
1799     @return: whether overall this call was successful (and we can expect
1800          reasonable values in the respose)
1801
1802     """
1803     node = ninfo.name
1804     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1805
1806     # main result, nresult should be a non-empty dict
1807     test = not nresult or not isinstance(nresult, dict)
1808     _ErrorIf(test, self.ENODERPC, node,
1809                   "unable to verify node: no data returned")
1810     if test:
1811       return False
1812
1813     # compares ganeti version
1814     local_version = constants.PROTOCOL_VERSION
1815     remote_version = nresult.get("version", None)
1816     test = not (remote_version and
1817                 isinstance(remote_version, (list, tuple)) and
1818                 len(remote_version) == 2)
1819     _ErrorIf(test, self.ENODERPC, node,
1820              "connection to node returned invalid data")
1821     if test:
1822       return False
1823
1824     test = local_version != remote_version[0]
1825     _ErrorIf(test, self.ENODEVERSION, node,
1826              "incompatible protocol versions: master %s,"
1827              " node %s", local_version, remote_version[0])
1828     if test:
1829       return False
1830
1831     # node seems compatible, we can actually try to look into its results
1832
1833     # full package version
1834     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835                   self.ENODEVERSION, node,
1836                   "software version mismatch: master %s, node %s",
1837                   constants.RELEASE_VERSION, remote_version[1],
1838                   code=self.ETYPE_WARNING)
1839
1840     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841     if ninfo.vm_capable and isinstance(hyp_result, dict):
1842       for hv_name, hv_result in hyp_result.iteritems():
1843         test = hv_result is not None
1844         _ErrorIf(test, self.ENODEHV, node,
1845                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1846
1847     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848     if ninfo.vm_capable and isinstance(hvp_result, list):
1849       for item, hv_name, hv_result in hvp_result:
1850         _ErrorIf(True, self.ENODEHV, node,
1851                  "hypervisor %s parameter verify failure (source %s): %s",
1852                  hv_name, item, hv_result)
1853
1854     test = nresult.get(constants.NV_NODESETUP,
1855                        ["Missing NODESETUP results"])
1856     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1857              "; ".join(test))
1858
1859     return True
1860
1861   def _VerifyNodeTime(self, ninfo, nresult,
1862                       nvinfo_starttime, nvinfo_endtime):
1863     """Check the node time.
1864
1865     @type ninfo: L{objects.Node}
1866     @param ninfo: the node to check
1867     @param nresult: the remote results for the node
1868     @param nvinfo_starttime: the start time of the RPC call
1869     @param nvinfo_endtime: the end time of the RPC call
1870
1871     """
1872     node = ninfo.name
1873     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1874
1875     ntime = nresult.get(constants.NV_TIME, None)
1876     try:
1877       ntime_merged = utils.MergeTime(ntime)
1878     except (ValueError, TypeError):
1879       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1880       return
1881
1882     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1886     else:
1887       ntime_diff = None
1888
1889     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890              "Node time diverges by at least %s from master node time",
1891              ntime_diff)
1892
1893   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894     """Check the node LVM results.
1895
1896     @type ninfo: L{objects.Node}
1897     @param ninfo: the node to check
1898     @param nresult: the remote results for the node
1899     @param vg_name: the configured VG name
1900
1901     """
1902     if vg_name is None:
1903       return
1904
1905     node = ninfo.name
1906     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1907
1908     # checks vg existence and size > 20G
1909     vglist = nresult.get(constants.NV_VGLIST, None)
1910     test = not vglist
1911     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1912     if not test:
1913       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914                                             constants.MIN_VG_SIZE)
1915       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1916
1917     # check pv names
1918     pvlist = nresult.get(constants.NV_PVLIST, None)
1919     test = pvlist is None
1920     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1921     if not test:
1922       # check that ':' is not present in PV names, since it's a
1923       # special character for lvcreate (denotes the range of PEs to
1924       # use on the PV)
1925       for _, pvname, owner_vg in pvlist:
1926         test = ":" in pvname
1927         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928                  " '%s' of VG '%s'", pvname, owner_vg)
1929
1930   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931     """Check the node bridges.
1932
1933     @type ninfo: L{objects.Node}
1934     @param ninfo: the node to check
1935     @param nresult: the remote results for the node
1936     @param bridges: the expected list of bridges
1937
1938     """
1939     if not bridges:
1940       return
1941
1942     node = ninfo.name
1943     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1944
1945     missing = nresult.get(constants.NV_BRIDGES, None)
1946     test = not isinstance(missing, list)
1947     _ErrorIf(test, self.ENODENET, node,
1948              "did not return valid bridge information")
1949     if not test:
1950       _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951                utils.CommaJoin(sorted(missing)))
1952
1953   def _VerifyNodeNetwork(self, ninfo, nresult):
1954     """Check the node network connectivity results.
1955
1956     @type ninfo: L{objects.Node}
1957     @param ninfo: the node to check
1958     @param nresult: the remote results for the node
1959
1960     """
1961     node = ninfo.name
1962     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1963
1964     test = constants.NV_NODELIST not in nresult
1965     _ErrorIf(test, self.ENODESSH, node,
1966              "node hasn't returned node ssh connectivity data")
1967     if not test:
1968       if nresult[constants.NV_NODELIST]:
1969         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970           _ErrorIf(True, self.ENODESSH, node,
1971                    "ssh communication with node '%s': %s", a_node, a_msg)
1972
1973     test = constants.NV_NODENETTEST not in nresult
1974     _ErrorIf(test, self.ENODENET, node,
1975              "node hasn't returned node tcp connectivity data")
1976     if not test:
1977       if nresult[constants.NV_NODENETTEST]:
1978         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1979         for anode in nlist:
1980           _ErrorIf(True, self.ENODENET, node,
1981                    "tcp communication with node '%s': %s",
1982                    anode, nresult[constants.NV_NODENETTEST][anode])
1983
1984     test = constants.NV_MASTERIP not in nresult
1985     _ErrorIf(test, self.ENODENET, node,
1986              "node hasn't returned node master IP reachability data")
1987     if not test:
1988       if not nresult[constants.NV_MASTERIP]:
1989         if node == self.master_node:
1990           msg = "the master node cannot reach the master IP (not configured?)"
1991         else:
1992           msg = "cannot reach the master IP"
1993         _ErrorIf(True, self.ENODENET, node, msg)
1994
1995   def _VerifyInstance(self, instance, instanceconfig, node_image,
1996                       diskstatus):
1997     """Verify an instance.
1998
1999     This function checks to see if the required block devices are
2000     available on the instance's node.
2001
2002     """
2003     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004     node_current = instanceconfig.primary_node
2005
2006     node_vol_should = {}
2007     instanceconfig.MapLVsByNode(node_vol_should)
2008
2009     for node in node_vol_should:
2010       n_img = node_image[node]
2011       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012         # ignore missing volumes on offline or broken nodes
2013         continue
2014       for volume in node_vol_should[node]:
2015         test = volume not in n_img.volumes
2016         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017                  "volume %s missing on node %s", volume, node)
2018
2019     if instanceconfig.admin_up:
2020       pri_img = node_image[node_current]
2021       test = instance not in pri_img.instances and not pri_img.offline
2022       _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023                "instance not running on its primary node %s",
2024                node_current)
2025
2026     diskdata = [(nname, success, status, idx)
2027                 for (nname, disks) in diskstatus.items()
2028                 for idx, (success, status) in enumerate(disks)]
2029
2030     for nname, success, bdev_status, idx in diskdata:
2031       # the 'ghost node' construction in Exec() ensures that we have a
2032       # node here
2033       snode = node_image[nname]
2034       bad_snode = snode.ghost or snode.offline
2035       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036                self.EINSTANCEFAULTYDISK, instance,
2037                "couldn't retrieve status for disk/%s on %s: %s",
2038                idx, nname, bdev_status)
2039       _ErrorIf((instanceconfig.admin_up and success and
2040                 bdev_status.ldisk_status == constants.LDS_FAULTY),
2041                self.EINSTANCEFAULTYDISK, instance,
2042                "disk/%s on %s is faulty", idx, nname)
2043
2044   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045     """Verify if there are any unknown volumes in the cluster.
2046
2047     The .os, .swap and backup volumes are ignored. All other volumes are
2048     reported as unknown.
2049
2050     @type reserved: L{ganeti.utils.FieldSet}
2051     @param reserved: a FieldSet of reserved volume names
2052
2053     """
2054     for node, n_img in node_image.items():
2055       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056         # skip non-healthy nodes
2057         continue
2058       for volume in n_img.volumes:
2059         test = ((node not in node_vol_should or
2060                 volume not in node_vol_should[node]) and
2061                 not reserved.Matches(volume))
2062         self._ErrorIf(test, self.ENODEORPHANLV, node,
2063                       "volume %s is unknown", volume)
2064
2065   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066     """Verify N+1 Memory Resilience.
2067
2068     Check that if one single node dies we can still start all the
2069     instances it was primary for.
2070
2071     """
2072     cluster_info = self.cfg.GetClusterInfo()
2073     for node, n_img in node_image.items():
2074       # This code checks that every node which is now listed as
2075       # secondary has enough memory to host all instances it is
2076       # supposed to should a single other node in the cluster fail.
2077       # FIXME: not ready for failover to an arbitrary node
2078       # FIXME: does not support file-backed instances
2079       # WARNING: we currently take into account down instances as well
2080       # as up ones, considering that even if they're down someone
2081       # might want to start them even in the event of a node failure.
2082       if n_img.offline:
2083         # we're skipping offline nodes from the N+1 warning, since
2084         # most likely we don't have good memory infromation from them;
2085         # we already list instances living on such nodes, and that's
2086         # enough warning
2087         continue
2088       for prinode, instances in n_img.sbp.items():
2089         needed_mem = 0
2090         for instance in instances:
2091           bep = cluster_info.FillBE(instance_cfg[instance])
2092           if bep[constants.BE_AUTO_BALANCE]:
2093             needed_mem += bep[constants.BE_MEMORY]
2094         test = n_img.mfree < needed_mem
2095         self._ErrorIf(test, self.ENODEN1, node,
2096                       "not enough memory to accomodate instance failovers"
2097                       " should node %s fail (%dMiB needed, %dMiB available)",
2098                       prinode, needed_mem, n_img.mfree)
2099
2100   @classmethod
2101   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102                    (files_all, files_all_opt, files_mc, files_vm)):
2103     """Verifies file checksums collected from all nodes.
2104
2105     @param errorif: Callback for reporting errors
2106     @param nodeinfo: List of L{objects.Node} objects
2107     @param master_node: Name of master node
2108     @param all_nvinfo: RPC results
2109
2110     """
2111     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2112             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2113            "Found file listed in more than one file list"
2114
2115     # Define functions determining which nodes to consider for a file
2116     files2nodefn = [
2117       (files_all, None),
2118       (files_all_opt, None),
2119       (files_mc, lambda node: (node.master_candidate or
2120                                node.name == master_node)),
2121       (files_vm, lambda node: node.vm_capable),
2122       ]
2123
2124     # Build mapping from filename to list of nodes which should have the file
2125     nodefiles = {}
2126     for (files, fn) in files2nodefn:
2127       if fn is None:
2128         filenodes = nodeinfo
2129       else:
2130         filenodes = filter(fn, nodeinfo)
2131       nodefiles.update((filename,
2132                         frozenset(map(operator.attrgetter("name"), filenodes)))
2133                        for filename in files)
2134
2135     assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2136
2137     fileinfo = dict((filename, {}) for filename in nodefiles)
2138     ignore_nodes = set()
2139
2140     for node in nodeinfo:
2141       if node.offline:
2142         ignore_nodes.add(node.name)
2143         continue
2144
2145       nresult = all_nvinfo[node.name]
2146
2147       if nresult.fail_msg or not nresult.payload:
2148         node_files = None
2149       else:
2150         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2151
2152       test = not (node_files and isinstance(node_files, dict))
2153       errorif(test, cls.ENODEFILECHECK, node.name,
2154               "Node did not return file checksum data")
2155       if test:
2156         ignore_nodes.add(node.name)
2157         continue
2158
2159       # Build per-checksum mapping from filename to nodes having it
2160       for (filename, checksum) in node_files.items():
2161         assert filename in nodefiles
2162         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2163
2164     for (filename, checksums) in fileinfo.items():
2165       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2166
2167       # Nodes having the file
2168       with_file = frozenset(node_name
2169                             for nodes in fileinfo[filename].values()
2170                             for node_name in nodes) - ignore_nodes
2171
2172       expected_nodes = nodefiles[filename] - ignore_nodes
2173
2174       # Nodes missing file
2175       missing_file = expected_nodes - with_file
2176
2177       if filename in files_all_opt:
2178         # All or no nodes
2179         errorif(missing_file and missing_file != expected_nodes,
2180                 cls.ECLUSTERFILECHECK, None,
2181                 "File %s is optional, but it must exist on all or no"
2182                 " nodes (not found on %s)",
2183                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2184       else:
2185         # Non-optional files
2186         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2187                 "File %s is missing from node(s) %s", filename,
2188                 utils.CommaJoin(utils.NiceSort(missing_file)))
2189
2190         # Warn if a node has a file it shouldn't
2191         unexpected = with_file - expected_nodes
2192         errorif(unexpected,
2193                 cls.ECLUSTERFILECHECK, None,
2194                 "File %s should not exist on node(s) %s",
2195                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2196
2197       # See if there are multiple versions of the file
2198       test = len(checksums) > 1
2199       if test:
2200         variants = ["variant %s on %s" %
2201                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2202                     for (idx, (checksum, nodes)) in
2203                       enumerate(sorted(checksums.items()))]
2204       else:
2205         variants = []
2206
2207       errorif(test, cls.ECLUSTERFILECHECK, None,
2208               "File %s found with %s different checksums (%s)",
2209               filename, len(checksums), "; ".join(variants))
2210
2211   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2212                       drbd_map):
2213     """Verifies and the node DRBD status.
2214
2215     @type ninfo: L{objects.Node}
2216     @param ninfo: the node to check
2217     @param nresult: the remote results for the node
2218     @param instanceinfo: the dict of instances
2219     @param drbd_helper: the configured DRBD usermode helper
2220     @param drbd_map: the DRBD map as returned by
2221         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2222
2223     """
2224     node = ninfo.name
2225     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2226
2227     if drbd_helper:
2228       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2229       test = (helper_result == None)
2230       _ErrorIf(test, self.ENODEDRBDHELPER, node,
2231                "no drbd usermode helper returned")
2232       if helper_result:
2233         status, payload = helper_result
2234         test = not status
2235         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2236                  "drbd usermode helper check unsuccessful: %s", payload)
2237         test = status and (payload != drbd_helper)
2238         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2239                  "wrong drbd usermode helper: %s", payload)
2240
2241     # compute the DRBD minors
2242     node_drbd = {}
2243     for minor, instance in drbd_map[node].items():
2244       test = instance not in instanceinfo
2245       _ErrorIf(test, self.ECLUSTERCFG, None,
2246                "ghost instance '%s' in temporary DRBD map", instance)
2247         # ghost instance should not be running, but otherwise we
2248         # don't give double warnings (both ghost instance and
2249         # unallocated minor in use)
2250       if test:
2251         node_drbd[minor] = (instance, False)
2252       else:
2253         instance = instanceinfo[instance]
2254         node_drbd[minor] = (instance.name, instance.admin_up)
2255
2256     # and now check them
2257     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2258     test = not isinstance(used_minors, (tuple, list))
2259     _ErrorIf(test, self.ENODEDRBD, node,
2260              "cannot parse drbd status file: %s", str(used_minors))
2261     if test:
2262       # we cannot check drbd status
2263       return
2264
2265     for minor, (iname, must_exist) in node_drbd.items():
2266       test = minor not in used_minors and must_exist
2267       _ErrorIf(test, self.ENODEDRBD, node,
2268                "drbd minor %d of instance %s is not active", minor, iname)
2269     for minor in used_minors:
2270       test = minor not in node_drbd
2271       _ErrorIf(test, self.ENODEDRBD, node,
2272                "unallocated drbd minor %d is in use", minor)
2273
2274   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2275     """Builds the node OS structures.
2276
2277     @type ninfo: L{objects.Node}
2278     @param ninfo: the node to check
2279     @param nresult: the remote results for the node
2280     @param nimg: the node image object
2281
2282     """
2283     node = ninfo.name
2284     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2285
2286     remote_os = nresult.get(constants.NV_OSLIST, None)
2287     test = (not isinstance(remote_os, list) or
2288             not compat.all(isinstance(v, list) and len(v) == 7
2289                            for v in remote_os))
2290
2291     _ErrorIf(test, self.ENODEOS, node,
2292              "node hasn't returned valid OS data")
2293
2294     nimg.os_fail = test
2295
2296     if test:
2297       return
2298
2299     os_dict = {}
2300
2301     for (name, os_path, status, diagnose,
2302          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2303
2304       if name not in os_dict:
2305         os_dict[name] = []
2306
2307       # parameters is a list of lists instead of list of tuples due to
2308       # JSON lacking a real tuple type, fix it:
2309       parameters = [tuple(v) for v in parameters]
2310       os_dict[name].append((os_path, status, diagnose,
2311                             set(variants), set(parameters), set(api_ver)))
2312
2313     nimg.oslist = os_dict
2314
2315   def _VerifyNodeOS(self, ninfo, nimg, base):
2316     """Verifies the node OS list.
2317
2318     @type ninfo: L{objects.Node}
2319     @param ninfo: the node to check
2320     @param nimg: the node image object
2321     @param base: the 'template' node we match against (e.g. from the master)
2322
2323     """
2324     node = ninfo.name
2325     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2326
2327     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2328
2329     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2330     for os_name, os_data in nimg.oslist.items():
2331       assert os_data, "Empty OS status for OS %s?!" % os_name
2332       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2333       _ErrorIf(not f_status, self.ENODEOS, node,
2334                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2335       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2336                "OS '%s' has multiple entries (first one shadows the rest): %s",
2337                os_name, utils.CommaJoin([v[0] for v in os_data]))
2338       # comparisons with the 'base' image
2339       test = os_name not in base.oslist
2340       _ErrorIf(test, self.ENODEOS, node,
2341                "Extra OS %s not present on reference node (%s)",
2342                os_name, base.name)
2343       if test:
2344         continue
2345       assert base.oslist[os_name], "Base node has empty OS status?"
2346       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2347       if not b_status:
2348         # base OS is invalid, skipping
2349         continue
2350       for kind, a, b in [("API version", f_api, b_api),
2351                          ("variants list", f_var, b_var),
2352                          ("parameters", beautify_params(f_param),
2353                           beautify_params(b_param))]:
2354         _ErrorIf(a != b, self.ENODEOS, node,
2355                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2356                  kind, os_name, base.name,
2357                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2358
2359     # check any missing OSes
2360     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2361     _ErrorIf(missing, self.ENODEOS, node,
2362              "OSes present on reference node %s but missing on this node: %s",
2363              base.name, utils.CommaJoin(missing))
2364
2365   def _VerifyOob(self, ninfo, nresult):
2366     """Verifies out of band functionality of a node.
2367
2368     @type ninfo: L{objects.Node}
2369     @param ninfo: the node to check
2370     @param nresult: the remote results for the node
2371
2372     """
2373     node = ninfo.name
2374     # We just have to verify the paths on master and/or master candidates
2375     # as the oob helper is invoked on the master
2376     if ((ninfo.master_candidate or ninfo.master_capable) and
2377         constants.NV_OOB_PATHS in nresult):
2378       for path_result in nresult[constants.NV_OOB_PATHS]:
2379         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2380
2381   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2382     """Verifies and updates the node volume data.
2383
2384     This function will update a L{NodeImage}'s internal structures
2385     with data from the remote call.
2386
2387     @type ninfo: L{objects.Node}
2388     @param ninfo: the node to check
2389     @param nresult: the remote results for the node
2390     @param nimg: the node image object
2391     @param vg_name: the configured VG name
2392
2393     """
2394     node = ninfo.name
2395     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2396
2397     nimg.lvm_fail = True
2398     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2399     if vg_name is None:
2400       pass
2401     elif isinstance(lvdata, basestring):
2402       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2403                utils.SafeEncode(lvdata))
2404     elif not isinstance(lvdata, dict):
2405       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2406     else:
2407       nimg.volumes = lvdata
2408       nimg.lvm_fail = False
2409
2410   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2411     """Verifies and updates the node instance list.
2412
2413     If the listing was successful, then updates this node's instance
2414     list. Otherwise, it marks the RPC call as failed for the instance
2415     list key.
2416
2417     @type ninfo: L{objects.Node}
2418     @param ninfo: the node to check
2419     @param nresult: the remote results for the node
2420     @param nimg: the node image object
2421
2422     """
2423     idata = nresult.get(constants.NV_INSTANCELIST, None)
2424     test = not isinstance(idata, list)
2425     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2426                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2427     if test:
2428       nimg.hyp_fail = True
2429     else:
2430       nimg.instances = idata
2431
2432   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2433     """Verifies and computes a node information map
2434
2435     @type ninfo: L{objects.Node}
2436     @param ninfo: the node to check
2437     @param nresult: the remote results for the node
2438     @param nimg: the node image object
2439     @param vg_name: the configured VG name
2440
2441     """
2442     node = ninfo.name
2443     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2444
2445     # try to read free memory (from the hypervisor)
2446     hv_info = nresult.get(constants.NV_HVINFO, None)
2447     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2448     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2449     if not test:
2450       try:
2451         nimg.mfree = int(hv_info["memory_free"])
2452       except (ValueError, TypeError):
2453         _ErrorIf(True, self.ENODERPC, node,
2454                  "node returned invalid nodeinfo, check hypervisor")
2455
2456     # FIXME: devise a free space model for file based instances as well
2457     if vg_name is not None:
2458       test = (constants.NV_VGLIST not in nresult or
2459               vg_name not in nresult[constants.NV_VGLIST])
2460       _ErrorIf(test, self.ENODELVM, node,
2461                "node didn't return data for the volume group '%s'"
2462                " - it is either missing or broken", vg_name)
2463       if not test:
2464         try:
2465           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2466         except (ValueError, TypeError):
2467           _ErrorIf(True, self.ENODERPC, node,
2468                    "node returned invalid LVM info, check LVM status")
2469
2470   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2471     """Gets per-disk status information for all instances.
2472
2473     @type nodelist: list of strings
2474     @param nodelist: Node names
2475     @type node_image: dict of (name, L{objects.Node})
2476     @param node_image: Node objects
2477     @type instanceinfo: dict of (name, L{objects.Instance})
2478     @param instanceinfo: Instance objects
2479     @rtype: {instance: {node: [(succes, payload)]}}
2480     @return: a dictionary of per-instance dictionaries with nodes as
2481         keys and disk information as values; the disk information is a
2482         list of tuples (success, payload)
2483
2484     """
2485     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2486
2487     node_disks = {}
2488     node_disks_devonly = {}
2489     diskless_instances = set()
2490     diskless = constants.DT_DISKLESS
2491
2492     for nname in nodelist:
2493       node_instances = list(itertools.chain(node_image[nname].pinst,
2494                                             node_image[nname].sinst))
2495       diskless_instances.update(inst for inst in node_instances
2496                                 if instanceinfo[inst].disk_template == diskless)
2497       disks = [(inst, disk)
2498                for inst in node_instances
2499                for disk in instanceinfo[inst].disks]
2500
2501       if not disks:
2502         # No need to collect data
2503         continue
2504
2505       node_disks[nname] = disks
2506
2507       # Creating copies as SetDiskID below will modify the objects and that can
2508       # lead to incorrect data returned from nodes
2509       devonly = [dev.Copy() for (_, dev) in disks]
2510
2511       for dev in devonly:
2512         self.cfg.SetDiskID(dev, nname)
2513
2514       node_disks_devonly[nname] = devonly
2515
2516     assert len(node_disks) == len(node_disks_devonly)
2517
2518     # Collect data from all nodes with disks
2519     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2520                                                           node_disks_devonly)
2521
2522     assert len(result) == len(node_disks)
2523
2524     instdisk = {}
2525
2526     for (nname, nres) in result.items():
2527       disks = node_disks[nname]
2528
2529       if nres.offline:
2530         # No data from this node
2531         data = len(disks) * [(False, "node offline")]
2532       else:
2533         msg = nres.fail_msg
2534         _ErrorIf(msg, self.ENODERPC, nname,
2535                  "while getting disk information: %s", msg)
2536         if msg:
2537           # No data from this node
2538           data = len(disks) * [(False, msg)]
2539         else:
2540           data = []
2541           for idx, i in enumerate(nres.payload):
2542             if isinstance(i, (tuple, list)) and len(i) == 2:
2543               data.append(i)
2544             else:
2545               logging.warning("Invalid result from node %s, entry %d: %s",
2546                               nname, idx, i)
2547               data.append((False, "Invalid result from the remote node"))
2548
2549       for ((inst, _), status) in zip(disks, data):
2550         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2551
2552     # Add empty entries for diskless instances.
2553     for inst in diskless_instances:
2554       assert inst not in instdisk
2555       instdisk[inst] = {}
2556
2557     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2558                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2559                       compat.all(isinstance(s, (tuple, list)) and
2560                                  len(s) == 2 for s in statuses)
2561                       for inst, nnames in instdisk.items()
2562                       for nname, statuses in nnames.items())
2563     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2564
2565     return instdisk
2566
2567   @staticmethod
2568   def _SshNodeSelector(group_uuid, all_nodes):
2569     """Create endless iterators for all potential SSH check hosts.
2570
2571     """
2572     nodes = [node for node in all_nodes
2573              if (node.group != group_uuid and
2574                  not node.offline)]
2575     keyfunc = operator.attrgetter("group")
2576
2577     return map(itertools.cycle,
2578                [sorted(map(operator.attrgetter("name"), names))
2579                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2580                                                   keyfunc)])
2581
2582   @classmethod
2583   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2584     """Choose which nodes should talk to which other nodes.
2585
2586     We will make nodes contact all nodes in their group, and one node from
2587     every other group.
2588
2589     @warning: This algorithm has a known issue if one node group is much
2590       smaller than others (e.g. just one node). In such a case all other
2591       nodes will talk to the single node.
2592
2593     """
2594     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2595     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2596
2597     return (online_nodes,
2598             dict((name, sorted([i.next() for i in sel]))
2599                  for name in online_nodes))
2600
2601   def BuildHooksEnv(self):
2602     """Build hooks env.
2603
2604     Cluster-Verify hooks just ran in the post phase and their failure makes
2605     the output be logged in the verify output and the verification to fail.
2606
2607     """
2608     env = {
2609       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2610       }
2611
2612     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2613                for node in self.my_node_info.values())
2614
2615     return env
2616
2617   def BuildHooksNodes(self):
2618     """Build hooks nodes.
2619
2620     """
2621     return ([], self.my_node_names)
2622
2623   def Exec(self, feedback_fn):
2624     """Verify integrity of the node group, performing various test on nodes.
2625
2626     """
2627     # This method has too many local variables. pylint: disable=R0914
2628     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2629
2630     if not self.my_node_names:
2631       # empty node group
2632       feedback_fn("* Empty node group, skipping verification")
2633       return True
2634
2635     self.bad = False
2636     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2637     verbose = self.op.verbose
2638     self._feedback_fn = feedback_fn
2639
2640     vg_name = self.cfg.GetVGName()
2641     drbd_helper = self.cfg.GetDRBDHelper()
2642     cluster = self.cfg.GetClusterInfo()
2643     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2644     hypervisors = cluster.enabled_hypervisors
2645     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2646
2647     i_non_redundant = [] # Non redundant instances
2648     i_non_a_balanced = [] # Non auto-balanced instances
2649     n_offline = 0 # Count of offline nodes
2650     n_drained = 0 # Count of nodes being drained
2651     node_vol_should = {}
2652
2653     # FIXME: verify OS list
2654
2655     # File verification
2656     filemap = _ComputeAncillaryFiles(cluster, False)
2657
2658     # do local checksums
2659     master_node = self.master_node = self.cfg.GetMasterNode()
2660     master_ip = self.cfg.GetMasterIP()
2661
2662     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2663
2664     node_verify_param = {
2665       constants.NV_FILELIST:
2666         utils.UniqueSequence(filename
2667                              for files in filemap
2668                              for filename in files),
2669       constants.NV_NODELIST:
2670         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2671                                   self.all_node_info.values()),
2672       constants.NV_HYPERVISOR: hypervisors,
2673       constants.NV_HVPARAMS:
2674         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2675       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2676                                  for node in node_data_list
2677                                  if not node.offline],
2678       constants.NV_INSTANCELIST: hypervisors,
2679       constants.NV_VERSION: None,
2680       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2681       constants.NV_NODESETUP: None,
2682       constants.NV_TIME: None,
2683       constants.NV_MASTERIP: (master_node, master_ip),
2684       constants.NV_OSLIST: None,
2685       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2686       }
2687
2688     if vg_name is not None:
2689       node_verify_param[constants.NV_VGLIST] = None
2690       node_verify_param[constants.NV_LVLIST] = vg_name
2691       node_verify_param[constants.NV_PVLIST] = [vg_name]
2692       node_verify_param[constants.NV_DRBDLIST] = None
2693
2694     if drbd_helper:
2695       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2696
2697     # bridge checks
2698     # FIXME: this needs to be changed per node-group, not cluster-wide
2699     bridges = set()
2700     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2701     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2702       bridges.add(default_nicpp[constants.NIC_LINK])
2703     for instance in self.my_inst_info.values():
2704       for nic in instance.nics:
2705         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2706         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2707           bridges.add(full_nic[constants.NIC_LINK])
2708
2709     if bridges:
2710       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2711
2712     # Build our expected cluster state
2713     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2714                                                  name=node.name,
2715                                                  vm_capable=node.vm_capable))
2716                       for node in node_data_list)
2717
2718     # Gather OOB paths
2719     oob_paths = []
2720     for node in self.all_node_info.values():
2721       path = _SupportsOob(self.cfg, node)
2722       if path and path not in oob_paths:
2723         oob_paths.append(path)
2724
2725     if oob_paths:
2726       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2727
2728     for instance in self.my_inst_names:
2729       inst_config = self.my_inst_info[instance]
2730
2731       for nname in inst_config.all_nodes:
2732         if nname not in node_image:
2733           gnode = self.NodeImage(name=nname)
2734           gnode.ghost = (nname not in self.all_node_info)
2735           node_image[nname] = gnode
2736
2737       inst_config.MapLVsByNode(node_vol_should)
2738
2739       pnode = inst_config.primary_node
2740       node_image[pnode].pinst.append(instance)
2741
2742       for snode in inst_config.secondary_nodes:
2743         nimg = node_image[snode]
2744         nimg.sinst.append(instance)
2745         if pnode not in nimg.sbp:
2746           nimg.sbp[pnode] = []
2747         nimg.sbp[pnode].append(instance)
2748
2749     # At this point, we have the in-memory data structures complete,
2750     # except for the runtime information, which we'll gather next
2751
2752     # Due to the way our RPC system works, exact response times cannot be
2753     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2754     # time before and after executing the request, we can at least have a time
2755     # window.
2756     nvinfo_starttime = time.time()
2757     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2758                                            node_verify_param,
2759                                            self.cfg.GetClusterName())
2760     nvinfo_endtime = time.time()
2761
2762     if self.extra_lv_nodes and vg_name is not None:
2763       extra_lv_nvinfo = \
2764           self.rpc.call_node_verify(self.extra_lv_nodes,
2765                                     {constants.NV_LVLIST: vg_name},
2766                                     self.cfg.GetClusterName())
2767     else:
2768       extra_lv_nvinfo = {}
2769
2770     all_drbd_map = self.cfg.ComputeDRBDMap()
2771
2772     feedback_fn("* Gathering disk information (%s nodes)" %
2773                 len(self.my_node_names))
2774     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2775                                      self.my_inst_info)
2776
2777     feedback_fn("* Verifying configuration file consistency")
2778
2779     # If not all nodes are being checked, we need to make sure the master node
2780     # and a non-checked vm_capable node are in the list.
2781     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2782     if absent_nodes:
2783       vf_nvinfo = all_nvinfo.copy()
2784       vf_node_info = list(self.my_node_info.values())
2785       additional_nodes = []
2786       if master_node not in self.my_node_info:
2787         additional_nodes.append(master_node)
2788         vf_node_info.append(self.all_node_info[master_node])
2789       # Add the first vm_capable node we find which is not included
2790       for node in absent_nodes:
2791         nodeinfo = self.all_node_info[node]
2792         if nodeinfo.vm_capable and not nodeinfo.offline:
2793           additional_nodes.append(node)
2794           vf_node_info.append(self.all_node_info[node])
2795           break
2796       key = constants.NV_FILELIST
2797       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2798                                                  {key: node_verify_param[key]},
2799                                                  self.cfg.GetClusterName()))
2800     else:
2801       vf_nvinfo = all_nvinfo
2802       vf_node_info = self.my_node_info.values()
2803
2804     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2805
2806     feedback_fn("* Verifying node status")
2807
2808     refos_img = None
2809
2810     for node_i in node_data_list:
2811       node = node_i.name
2812       nimg = node_image[node]
2813
2814       if node_i.offline:
2815         if verbose:
2816           feedback_fn("* Skipping offline node %s" % (node,))
2817         n_offline += 1
2818         continue
2819
2820       if node == master_node:
2821         ntype = "master"
2822       elif node_i.master_candidate:
2823         ntype = "master candidate"
2824       elif node_i.drained:
2825         ntype = "drained"
2826         n_drained += 1
2827       else:
2828         ntype = "regular"
2829       if verbose:
2830         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2831
2832       msg = all_nvinfo[node].fail_msg
2833       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2834       if msg:
2835         nimg.rpc_fail = True
2836         continue
2837
2838       nresult = all_nvinfo[node].payload
2839
2840       nimg.call_ok = self._VerifyNode(node_i, nresult)
2841       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2842       self._VerifyNodeNetwork(node_i, nresult)
2843       self._VerifyOob(node_i, nresult)
2844
2845       if nimg.vm_capable:
2846         self._VerifyNodeLVM(node_i, nresult, vg_name)
2847         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2848                              all_drbd_map)
2849
2850         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2851         self._UpdateNodeInstances(node_i, nresult, nimg)
2852         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2853         self._UpdateNodeOS(node_i, nresult, nimg)
2854
2855         if not nimg.os_fail:
2856           if refos_img is None:
2857             refos_img = nimg
2858           self._VerifyNodeOS(node_i, nimg, refos_img)
2859         self._VerifyNodeBridges(node_i, nresult, bridges)
2860
2861         # Check whether all running instancies are primary for the node. (This
2862         # can no longer be done from _VerifyInstance below, since some of the
2863         # wrong instances could be from other node groups.)
2864         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2865
2866         for inst in non_primary_inst:
2867           test = inst in self.all_inst_info
2868           _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2869                    "instance should not run on node %s", node_i.name)
2870           _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2871                    "node is running unknown instance %s", inst)
2872
2873     for node, result in extra_lv_nvinfo.items():
2874       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2875                               node_image[node], vg_name)
2876
2877     feedback_fn("* Verifying instance status")
2878     for instance in self.my_inst_names:
2879       if verbose:
2880         feedback_fn("* Verifying instance %s" % instance)
2881       inst_config = self.my_inst_info[instance]
2882       self._VerifyInstance(instance, inst_config, node_image,
2883                            instdisk[instance])
2884       inst_nodes_offline = []
2885
2886       pnode = inst_config.primary_node
2887       pnode_img = node_image[pnode]
2888       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2889                self.ENODERPC, pnode, "instance %s, connection to"
2890                " primary node failed", instance)
2891
2892       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2893                self.EINSTANCEBADNODE, instance,
2894                "instance is marked as running and lives on offline node %s",
2895                inst_config.primary_node)
2896
2897       # If the instance is non-redundant we cannot survive losing its primary
2898       # node, so we are not N+1 compliant. On the other hand we have no disk
2899       # templates with more than one secondary so that situation is not well
2900       # supported either.
2901       # FIXME: does not support file-backed instances
2902       if not inst_config.secondary_nodes:
2903         i_non_redundant.append(instance)
2904
2905       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2906                instance, "instance has multiple secondary nodes: %s",
2907                utils.CommaJoin(inst_config.secondary_nodes),
2908                code=self.ETYPE_WARNING)
2909
2910       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2911         pnode = inst_config.primary_node
2912         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2913         instance_groups = {}
2914
2915         for node in instance_nodes:
2916           instance_groups.setdefault(self.all_node_info[node].group,
2917                                      []).append(node)
2918
2919         pretty_list = [
2920           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2921           # Sort so that we always list the primary node first.
2922           for group, nodes in sorted(instance_groups.items(),
2923                                      key=lambda (_, nodes): pnode in nodes,
2924                                      reverse=True)]
2925
2926         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2927                       instance, "instance has primary and secondary nodes in"
2928                       " different groups: %s", utils.CommaJoin(pretty_list),
2929                       code=self.ETYPE_WARNING)
2930
2931       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2932         i_non_a_balanced.append(instance)
2933
2934       for snode in inst_config.secondary_nodes:
2935         s_img = node_image[snode]
2936         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2937                  "instance %s, connection to secondary node failed", instance)
2938
2939         if s_img.offline:
2940           inst_nodes_offline.append(snode)
2941
2942       # warn that the instance lives on offline nodes
2943       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2944                "instance has offline secondary node(s) %s",
2945                utils.CommaJoin(inst_nodes_offline))
2946       # ... or ghost/non-vm_capable nodes
2947       for node in inst_config.all_nodes:
2948         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2949                  "instance lives on ghost node %s", node)
2950         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2951                  instance, "instance lives on non-vm_capable node %s", node)
2952
2953     feedback_fn("* Verifying orphan volumes")
2954     reserved = utils.FieldSet(*cluster.reserved_lvs)
2955
2956     # We will get spurious "unknown volume" warnings if any node of this group
2957     # is secondary for an instance whose primary is in another group. To avoid
2958     # them, we find these instances and add their volumes to node_vol_should.
2959     for inst in self.all_inst_info.values():
2960       for secondary in inst.secondary_nodes:
2961         if (secondary in self.my_node_info
2962             and inst.name not in self.my_inst_info):
2963           inst.MapLVsByNode(node_vol_should)
2964           break
2965
2966     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2967
2968     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2969       feedback_fn("* Verifying N+1 Memory redundancy")
2970       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2971
2972     feedback_fn("* Other Notes")
2973     if i_non_redundant:
2974       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2975                   % len(i_non_redundant))
2976
2977     if i_non_a_balanced:
2978       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2979                   % len(i_non_a_balanced))
2980
2981     if n_offline:
2982       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2983
2984     if n_drained:
2985       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2986
2987     return not self.bad
2988
2989   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2990     """Analyze the post-hooks' result
2991
2992     This method analyses the hook result, handles it, and sends some
2993     nicely-formatted feedback back to the user.
2994
2995     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2996         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2997     @param hooks_results: the results of the multi-node hooks rpc call
2998     @param feedback_fn: function used send feedback back to the caller
2999     @param lu_result: previous Exec result
3000     @return: the new Exec result, based on the previous result
3001         and hook results
3002
3003     """
3004     # We only really run POST phase hooks, only for non-empty groups,
3005     # and are only interested in their results
3006     if not self.my_node_names:
3007       # empty node group
3008       pass
3009     elif phase == constants.HOOKS_PHASE_POST:
3010       # Used to change hooks' output to proper indentation
3011       feedback_fn("* Hooks Results")
3012       assert hooks_results, "invalid result from hooks"
3013
3014       for node_name in hooks_results:
3015         res = hooks_results[node_name]
3016         msg = res.fail_msg
3017         test = msg and not res.offline
3018         self._ErrorIf(test, self.ENODEHOOKS, node_name,
3019                       "Communication failure in hooks execution: %s", msg)
3020         if res.offline or msg:
3021           # No need to investigate payload if node is offline or gave
3022           # an error.
3023           continue
3024         for script, hkr, output in res.payload:
3025           test = hkr == constants.HKR_FAIL
3026           self._ErrorIf(test, self.ENODEHOOKS, node_name,
3027                         "Script %s failed, output:", script)
3028           if test:
3029             output = self._HOOKS_INDENT_RE.sub("      ", output)
3030             feedback_fn("%s" % output)
3031             lu_result = False
3032
3033     return lu_result
3034
3035
3036 class LUClusterVerifyDisks(NoHooksLU):
3037   """Verifies the cluster disks status.
3038
3039   """
3040   REQ_BGL = False
3041
3042   def ExpandNames(self):
3043     self.share_locks = _ShareAll()
3044     self.needed_locks = {
3045       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3046       }
3047
3048   def Exec(self, feedback_fn):
3049     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3050
3051     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3052     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3053                            for group in group_names])
3054
3055
3056 class LUGroupVerifyDisks(NoHooksLU):
3057   """Verifies the status of all disks in a node group.
3058
3059   """
3060   REQ_BGL = False
3061
3062   def ExpandNames(self):
3063     # Raises errors.OpPrereqError on its own if group can't be found
3064     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3065
3066     self.share_locks = _ShareAll()
3067     self.needed_locks = {
3068       locking.LEVEL_INSTANCE: [],
3069       locking.LEVEL_NODEGROUP: [],
3070       locking.LEVEL_NODE: [],
3071       }
3072
3073   def DeclareLocks(self, level):
3074     if level == locking.LEVEL_INSTANCE:
3075       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3076
3077       # Lock instances optimistically, needs verification once node and group
3078       # locks have been acquired
3079       self.needed_locks[locking.LEVEL_INSTANCE] = \
3080         self.cfg.GetNodeGroupInstances(self.group_uuid)
3081
3082     elif level == locking.LEVEL_NODEGROUP:
3083       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3084
3085       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3086         set([self.group_uuid] +
3087             # Lock all groups used by instances optimistically; this requires
3088             # going via the node before it's locked, requiring verification
3089             # later on
3090             [group_uuid
3091              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3092              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3093
3094     elif level == locking.LEVEL_NODE:
3095       # This will only lock the nodes in the group to be verified which contain
3096       # actual instances
3097       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3098       self._LockInstancesNodes()
3099
3100       # Lock all nodes in group to be verified
3101       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3102       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3103       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3104
3105   def CheckPrereq(self):
3106     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3107     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3108     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3109
3110     assert self.group_uuid in owned_groups
3111
3112     # Check if locked instances are still correct
3113     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3114
3115     # Get instance information
3116     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3117
3118     # Check if node groups for locked instances are still correct
3119     for (instance_name, inst) in self.instances.items():
3120       assert owned_nodes.issuperset(inst.all_nodes), \
3121         "Instance %s's nodes changed while we kept the lock" % instance_name
3122
3123       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3124                                              owned_groups)
3125
3126       assert self.group_uuid in inst_groups, \
3127         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3128
3129   def Exec(self, feedback_fn):
3130     """Verify integrity of cluster disks.
3131
3132     @rtype: tuple of three items
3133     @return: a tuple of (dict of node-to-node_error, list of instances
3134         which need activate-disks, dict of instance: (node, volume) for
3135         missing volumes
3136
3137     """
3138     res_nodes = {}
3139     res_instances = set()
3140     res_missing = {}
3141
3142     nv_dict = _MapInstanceDisksToNodes([inst
3143                                         for inst in self.instances.values()
3144                                         if inst.admin_up])
3145
3146     if nv_dict:
3147       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3148                              set(self.cfg.GetVmCapableNodeList()))
3149
3150       node_lvs = self.rpc.call_lv_list(nodes, [])
3151
3152       for (node, node_res) in node_lvs.items():
3153         if node_res.offline:
3154           continue
3155
3156         msg = node_res.fail_msg
3157         if msg:
3158           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3159           res_nodes[node] = msg
3160           continue
3161
3162         for lv_name, (_, _, lv_online) in node_res.payload.items():
3163           inst = nv_dict.pop((node, lv_name), None)
3164           if not (lv_online or inst is None):
3165             res_instances.add(inst)
3166
3167       # any leftover items in nv_dict are missing LVs, let's arrange the data
3168       # better
3169       for key, inst in nv_dict.iteritems():
3170         res_missing.setdefault(inst, []).append(key)
3171
3172     return (res_nodes, list(res_instances), res_missing)
3173
3174
3175 class LUClusterRepairDiskSizes(NoHooksLU):
3176   """Verifies the cluster disks sizes.
3177
3178   """
3179   REQ_BGL = False
3180
3181   def ExpandNames(self):
3182     if self.op.instances:
3183       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3184       self.needed_locks = {
3185         locking.LEVEL_NODE: [],
3186         locking.LEVEL_INSTANCE: self.wanted_names,
3187         }
3188       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3189     else:
3190       self.wanted_names = None
3191       self.needed_locks = {
3192         locking.LEVEL_NODE: locking.ALL_SET,
3193         locking.LEVEL_INSTANCE: locking.ALL_SET,
3194         }
3195     self.share_locks = _ShareAll()
3196
3197   def DeclareLocks(self, level):
3198     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3199       self._LockInstancesNodes(primary_only=True)
3200
3201   def CheckPrereq(self):
3202     """Check prerequisites.
3203
3204     This only checks the optional instance list against the existing names.
3205
3206     """
3207     if self.wanted_names is None:
3208       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3209
3210     self.wanted_instances = \
3211         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3212
3213   def _EnsureChildSizes(self, disk):
3214     """Ensure children of the disk have the needed disk size.
3215
3216     This is valid mainly for DRBD8 and fixes an issue where the
3217     children have smaller disk size.
3218
3219     @param disk: an L{ganeti.objects.Disk} object
3220
3221     """
3222     if disk.dev_type == constants.LD_DRBD8:
3223       assert disk.children, "Empty children for DRBD8?"
3224       fchild = disk.children[0]
3225       mismatch = fchild.size < disk.size
3226       if mismatch:
3227         self.LogInfo("Child disk has size %d, parent %d, fixing",
3228                      fchild.size, disk.size)
3229         fchild.size = disk.size
3230
3231       # and we recurse on this child only, not on the metadev
3232       return self._EnsureChildSizes(fchild) or mismatch
3233     else:
3234       return False
3235
3236   def Exec(self, feedback_fn):
3237     """Verify the size of cluster disks.
3238
3239     """
3240     # TODO: check child disks too
3241     # TODO: check differences in size between primary/secondary nodes
3242     per_node_disks = {}
3243     for instance in self.wanted_instances:
3244       pnode = instance.primary_node
3245       if pnode not in per_node_disks:
3246         per_node_disks[pnode] = []
3247       for idx, disk in enumerate(instance.disks):
3248         per_node_disks[pnode].append((instance, idx, disk))
3249
3250     changed = []
3251     for node, dskl in per_node_disks.items():
3252       newl = [v[2].Copy() for v in dskl]
3253       for dsk in newl:
3254         self.cfg.SetDiskID(dsk, node)
3255       result = self.rpc.call_blockdev_getsize(node, newl)
3256       if result.fail_msg:
3257         self.LogWarning("Failure in blockdev_getsize call to node"
3258                         " %s, ignoring", node)
3259         continue
3260       if len(result.payload) != len(dskl):
3261         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3262                         " result.payload=%s", node, len(dskl), result.payload)
3263         self.LogWarning("Invalid result from node %s, ignoring node results",
3264                         node)
3265         continue
3266       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3267         if size is None:
3268           self.LogWarning("Disk %d of instance %s did not return size"
3269                           " information, ignoring", idx, instance.name)
3270           continue
3271         if not isinstance(size, (int, long)):
3272           self.LogWarning("Disk %d of instance %s did not return valid"
3273                           " size information, ignoring", idx, instance.name)
3274           continue
3275         size = size >> 20
3276         if size != disk.size:
3277           self.LogInfo("Disk %d of instance %s has mismatched size,"
3278                        " correcting: recorded %d, actual %d", idx,
3279                        instance.name, disk.size, size)
3280           disk.size = size
3281           self.cfg.Update(instance, feedback_fn)
3282           changed.append((instance.name, idx, size))
3283         if self._EnsureChildSizes(disk):
3284           self.cfg.Update(instance, feedback_fn)
3285           changed.append((instance.name, idx, disk.size))
3286     return changed
3287
3288
3289 class LUClusterRename(LogicalUnit):
3290   """Rename the cluster.
3291
3292   """
3293   HPATH = "cluster-rename"
3294   HTYPE = constants.HTYPE_CLUSTER
3295
3296   def BuildHooksEnv(self):
3297     """Build hooks env.
3298
3299     """
3300     return {
3301       "OP_TARGET": self.cfg.GetClusterName(),
3302       "NEW_NAME": self.op.name,
3303       }
3304
3305   def BuildHooksNodes(self):
3306     """Build hooks nodes.
3307
3308     """
3309     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3310
3311   def CheckPrereq(self):
3312     """Verify that the passed name is a valid one.
3313
3314     """
3315     hostname = netutils.GetHostname(name=self.op.name,
3316                                     family=self.cfg.GetPrimaryIPFamily())
3317
3318     new_name = hostname.name
3319     self.ip = new_ip = hostname.ip
3320     old_name = self.cfg.GetClusterName()
3321     old_ip = self.cfg.GetMasterIP()
3322     if new_name == old_name and new_ip == old_ip:
3323       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3324                                  " cluster has changed",
3325                                  errors.ECODE_INVAL)
3326     if new_ip != old_ip:
3327       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3328         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3329                                    " reachable on the network" %
3330                                    new_ip, errors.ECODE_NOTUNIQUE)
3331
3332     self.op.name = new_name
3333
3334   def Exec(self, feedback_fn):
3335     """Rename the cluster.
3336
3337     """
3338     clustername = self.op.name
3339     ip = self.ip
3340
3341     # shutdown the master IP
3342     master = self.cfg.GetMasterNode()
3343     result = self.rpc.call_node_stop_master(master, False)
3344     result.Raise("Could not disable the master role")
3345
3346     try:
3347       cluster = self.cfg.GetClusterInfo()
3348       cluster.cluster_name = clustername
3349       cluster.master_ip = ip
3350       self.cfg.Update(cluster, feedback_fn)
3351
3352       # update the known hosts file
3353       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3354       node_list = self.cfg.GetOnlineNodeList()
3355       try:
3356         node_list.remove(master)
3357       except ValueError:
3358         pass
3359       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3360     finally:
3361       result = self.rpc.call_node_start_master(master, False, False)
3362       msg = result.fail_msg
3363       if msg:
3364         self.LogWarning("Could not re-enable the master role on"
3365                         " the master, please restart manually: %s", msg)
3366
3367     return clustername
3368
3369
3370 class LUClusterSetParams(LogicalUnit):
3371   """Change the parameters of the cluster.
3372
3373   """
3374   HPATH = "cluster-modify"
3375   HTYPE = constants.HTYPE_CLUSTER
3376   REQ_BGL = False
3377
3378   def CheckArguments(self):
3379     """Check parameters
3380
3381     """
3382     if self.op.uid_pool:
3383       uidpool.CheckUidPool(self.op.uid_pool)
3384
3385     if self.op.add_uids:
3386       uidpool.CheckUidPool(self.op.add_uids)
3387
3388     if self.op.remove_uids:
3389       uidpool.CheckUidPool(self.op.remove_uids)
3390
3391   def ExpandNames(self):
3392     # FIXME: in the future maybe other cluster params won't require checking on
3393     # all nodes to be modified.
3394     self.needed_locks = {
3395       locking.LEVEL_NODE: locking.ALL_SET,
3396     }
3397     self.share_locks[locking.LEVEL_NODE] = 1
3398
3399   def BuildHooksEnv(self):
3400     """Build hooks env.
3401
3402     """
3403     return {
3404       "OP_TARGET": self.cfg.GetClusterName(),
3405       "NEW_VG_NAME": self.op.vg_name,
3406       }
3407
3408   def BuildHooksNodes(self):
3409     """Build hooks nodes.
3410
3411     """
3412     mn = self.cfg.GetMasterNode()
3413     return ([mn], [mn])
3414
3415   def CheckPrereq(self):
3416     """Check prerequisites.
3417
3418     This checks whether the given params don't conflict and
3419     if the given volume group is valid.
3420
3421     """
3422     if self.op.vg_name is not None and not self.op.vg_name:
3423       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3424         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3425                                    " instances exist", errors.ECODE_INVAL)
3426
3427     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3428       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3429         raise errors.OpPrereqError("Cannot disable drbd helper while"
3430                                    " drbd-based instances exist",
3431                                    errors.ECODE_INVAL)
3432
3433     node_list = self.owned_locks(locking.LEVEL_NODE)
3434
3435     # if vg_name not None, checks given volume group on all nodes
3436     if self.op.vg_name:
3437       vglist = self.rpc.call_vg_list(node_list)
3438       for node in node_list:
3439         msg = vglist[node].fail_msg
3440         if msg:
3441           # ignoring down node
3442           self.LogWarning("Error while gathering data on node %s"
3443                           " (ignoring node): %s", node, msg)
3444           continue
3445         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3446                                               self.op.vg_name,
3447                                               constants.MIN_VG_SIZE)
3448         if vgstatus:
3449           raise errors.OpPrereqError("Error on node '%s': %s" %
3450                                      (node, vgstatus), errors.ECODE_ENVIRON)
3451
3452     if self.op.drbd_helper:
3453       # checks given drbd helper on all nodes
3454       helpers = self.rpc.call_drbd_helper(node_list)
3455       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3456         if ninfo.offline:
3457           self.LogInfo("Not checking drbd helper on offline node %s", node)
3458           continue
3459         msg = helpers[node].fail_msg
3460         if msg:
3461           raise errors.OpPrereqError("Error checking drbd helper on node"
3462                                      " '%s': %s" % (node, msg),
3463                                      errors.ECODE_ENVIRON)
3464         node_helper = helpers[node].payload
3465         if node_helper != self.op.drbd_helper:
3466           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3467                                      (node, node_helper), errors.ECODE_ENVIRON)
3468
3469     self.cluster = cluster = self.cfg.GetClusterInfo()
3470     # validate params changes
3471     if self.op.beparams:
3472       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3473       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3474
3475     if self.op.ndparams:
3476       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3477       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3478
3479       # TODO: we need a more general way to handle resetting
3480       # cluster-level parameters to default values
3481       if self.new_ndparams["oob_program"] == "":
3482         self.new_ndparams["oob_program"] = \
3483             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3484
3485     if self.op.nicparams:
3486       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3487       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3488       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3489       nic_errors = []
3490
3491       # check all instances for consistency
3492       for instance in self.cfg.GetAllInstancesInfo().values():
3493         for nic_idx, nic in enumerate(instance.nics):
3494           params_copy = copy.deepcopy(nic.nicparams)
3495           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3496
3497           # check parameter syntax
3498           try:
3499             objects.NIC.CheckParameterSyntax(params_filled)
3500           except errors.ConfigurationError, err:
3501             nic_errors.append("Instance %s, nic/%d: %s" %
3502                               (instance.name, nic_idx, err))
3503
3504           # if we're moving instances to routed, check that they have an ip
3505           target_mode = params_filled[constants.NIC_MODE]
3506           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3507             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3508                               " address" % (instance.name, nic_idx))
3509       if nic_errors:
3510         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3511                                    "\n".join(nic_errors))
3512
3513     # hypervisor list/parameters
3514     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3515     if self.op.hvparams:
3516       for hv_name, hv_dict in self.op.hvparams.items():
3517         if hv_name not in self.new_hvparams:
3518           self.new_hvparams[hv_name] = hv_dict
3519         else:
3520           self.new_hvparams[hv_name].update(hv_dict)
3521
3522     # os hypervisor parameters
3523     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3524     if self.op.os_hvp:
3525       for os_name, hvs in self.op.os_hvp.items():
3526         if os_name not in self.new_os_hvp:
3527           self.new_os_hvp[os_name] = hvs
3528         else:
3529           for hv_name, hv_dict in hvs.items():
3530             if hv_name not in self.new_os_hvp[os_name]:
3531               self.new_os_hvp[os_name][hv_name] = hv_dict
3532             else:
3533               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3534
3535     # os parameters
3536     self.new_osp = objects.FillDict(cluster.osparams, {})
3537     if self.op.osparams:
3538       for os_name, osp in self.op.osparams.items():
3539         if os_name not in self.new_osp:
3540           self.new_osp[os_name] = {}
3541
3542         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3543                                                   use_none=True)
3544
3545         if not self.new_osp[os_name]:
3546           # we removed all parameters
3547           del self.new_osp[os_name]
3548         else:
3549           # check the parameter validity (remote check)
3550           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3551                          os_name, self.new_osp[os_name])
3552
3553     # changes to the hypervisor list
3554     if self.op.enabled_hypervisors is not None:
3555       self.hv_list = self.op.enabled_hypervisors
3556       for hv in self.hv_list:
3557         # if the hypervisor doesn't already exist in the cluster
3558         # hvparams, we initialize it to empty, and then (in both
3559         # cases) we make sure to fill the defaults, as we might not
3560         # have a complete defaults list if the hypervisor wasn't
3561         # enabled before
3562         if hv not in new_hvp:
3563           new_hvp[hv] = {}
3564         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3565         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3566     else:
3567       self.hv_list = cluster.enabled_hypervisors
3568
3569     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3570       # either the enabled list has changed, or the parameters have, validate
3571       for hv_name, hv_params in self.new_hvparams.items():
3572         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3573             (self.op.enabled_hypervisors and
3574              hv_name in self.op.enabled_hypervisors)):
3575           # either this is a new hypervisor, or its parameters have changed
3576           hv_class = hypervisor.GetHypervisor(hv_name)
3577           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3578           hv_class.CheckParameterSyntax(hv_params)
3579           _CheckHVParams(self, node_list, hv_name, hv_params)
3580
3581     if self.op.os_hvp:
3582       # no need to check any newly-enabled hypervisors, since the
3583       # defaults have already been checked in the above code-block
3584       for os_name, os_hvp in self.new_os_hvp.items():
3585         for hv_name, hv_params in os_hvp.items():
3586           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3587           # we need to fill in the new os_hvp on top of the actual hv_p
3588           cluster_defaults = self.new_hvparams.get(hv_name, {})
3589           new_osp = objects.FillDict(cluster_defaults, hv_params)
3590           hv_class = hypervisor.GetHypervisor(hv_name)
3591           hv_class.CheckParameterSyntax(new_osp)
3592           _CheckHVParams(self, node_list, hv_name, new_osp)
3593
3594     if self.op.default_iallocator:
3595       alloc_script = utils.FindFile(self.op.default_iallocator,
3596                                     constants.IALLOCATOR_SEARCH_PATH,
3597                                     os.path.isfile)
3598       if alloc_script is None:
3599         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3600                                    " specified" % self.op.default_iallocator,
3601                                    errors.ECODE_INVAL)
3602
3603   def Exec(self, feedback_fn):
3604     """Change the parameters of the cluster.
3605
3606     """
3607     if self.op.vg_name is not None:
3608       new_volume = self.op.vg_name
3609       if not new_volume:
3610         new_volume = None
3611       if new_volume != self.cfg.GetVGName():
3612         self.cfg.SetVGName(new_volume)
3613       else:
3614         feedback_fn("Cluster LVM configuration already in desired"
3615                     " state, not changing")
3616     if self.op.drbd_helper is not None:
3617       new_helper = self.op.drbd_helper
3618       if not new_helper:
3619         new_helper = None
3620       if new_helper != self.cfg.GetDRBDHelper():
3621         self.cfg.SetDRBDHelper(new_helper)
3622       else:
3623         feedback_fn("Cluster DRBD helper already in desired state,"
3624                     " not changing")
3625     if self.op.hvparams:
3626       self.cluster.hvparams = self.new_hvparams
3627     if self.op.os_hvp:
3628       self.cluster.os_hvp = self.new_os_hvp
3629     if self.op.enabled_hypervisors is not None:
3630       self.cluster.hvparams = self.new_hvparams
3631       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3632     if self.op.beparams:
3633       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3634     if self.op.nicparams:
3635       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3636     if self.op.osparams:
3637       self.cluster.osparams = self.new_osp
3638     if self.op.ndparams:
3639       self.cluster.ndparams = self.new_ndparams
3640
3641     if self.op.candidate_pool_size is not None:
3642       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3643       # we need to update the pool size here, otherwise the save will fail
3644       _AdjustCandidatePool(self, [])
3645
3646     if self.op.maintain_node_health is not None:
3647       self.cluster.maintain_node_health = self.op.maintain_node_health
3648
3649     if self.op.prealloc_wipe_disks is not None:
3650       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3651
3652     if self.op.add_uids is not None:
3653       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3654
3655     if self.op.remove_uids is not None:
3656       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3657
3658     if self.op.uid_pool is not None:
3659       self.cluster.uid_pool = self.op.uid_pool
3660
3661     if self.op.default_iallocator is not None:
3662       self.cluster.default_iallocator = self.op.default_iallocator
3663
3664     if self.op.reserved_lvs is not None:
3665       self.cluster.reserved_lvs = self.op.reserved_lvs
3666
3667     def helper_os(aname, mods, desc):
3668       desc += " OS list"
3669       lst = getattr(self.cluster, aname)
3670       for key, val in mods:
3671         if key == constants.DDM_ADD:
3672           if val in lst:
3673             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3674           else:
3675             lst.append(val)
3676         elif key == constants.DDM_REMOVE:
3677           if val in lst:
3678             lst.remove(val)
3679           else:
3680             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3681         else:
3682           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3683
3684     if self.op.hidden_os:
3685       helper_os("hidden_os", self.op.hidden_os, "hidden")
3686
3687     if self.op.blacklisted_os:
3688       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3689
3690     if self.op.master_netdev:
3691       master = self.cfg.GetMasterNode()
3692       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3693                   self.cluster.master_netdev)
3694       result = self.rpc.call_node_stop_master(master, False)
3695       result.Raise("Could not disable the master ip")
3696       feedback_fn("Changing master_netdev from %s to %s" %
3697                   (self.cluster.master_netdev, self.op.master_netdev))
3698       self.cluster.master_netdev = self.op.master_netdev
3699
3700     self.cfg.Update(self.cluster, feedback_fn)
3701
3702     if self.op.master_netdev:
3703       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3704                   self.op.master_netdev)
3705       result = self.rpc.call_node_start_master(master, False, False)
3706       if result.fail_msg:
3707         self.LogWarning("Could not re-enable the master ip on"
3708                         " the master, please restart manually: %s",
3709                         result.fail_msg)
3710
3711
3712 def _UploadHelper(lu, nodes, fname):
3713   """Helper for uploading a file and showing warnings.
3714
3715   """
3716   if os.path.exists(fname):
3717     result = lu.rpc.call_upload_file(nodes, fname)
3718     for to_node, to_result in result.items():
3719       msg = to_result.fail_msg
3720       if msg:
3721         msg = ("Copy of file %s to node %s failed: %s" %
3722                (fname, to_node, msg))
3723         lu.proc.LogWarning(msg)
3724
3725
3726 def _ComputeAncillaryFiles(cluster, redist):
3727   """Compute files external to Ganeti which need to be consistent.
3728
3729   @type redist: boolean
3730   @param redist: Whether to include files which need to be redistributed
3731
3732   """
3733   # Compute files for all nodes
3734   files_all = set([
3735     constants.SSH_KNOWN_HOSTS_FILE,
3736     constants.CONFD_HMAC_KEY,
3737     constants.CLUSTER_DOMAIN_SECRET_FILE,
3738     ])
3739
3740   if not redist:
3741     files_all.update(constants.ALL_CERT_FILES)
3742     files_all.update(ssconf.SimpleStore().GetFileList())
3743   else:
3744     # we need to ship at least the RAPI certificate
3745     files_all.add(constants.RAPI_CERT_FILE)
3746
3747   if cluster.modify_etc_hosts:
3748     files_all.add(constants.ETC_HOSTS)
3749
3750   # Files which must either exist on all nodes or on none
3751   files_all_opt = set([
3752     constants.RAPI_USERS_FILE,
3753     ])
3754
3755   # Files which should only be on master candidates
3756   files_mc = set()
3757   if not redist:
3758     files_mc.add(constants.CLUSTER_CONF_FILE)
3759
3760   # Files which should only be on VM-capable nodes
3761   files_vm = set(filename
3762     for hv_name in cluster.enabled_hypervisors
3763     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3764
3765   # Filenames must be unique
3766   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3767           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3768          "Found file listed in more than one file list"
3769
3770   return (files_all, files_all_opt, files_mc, files_vm)
3771
3772
3773 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3774   """Distribute additional files which are part of the cluster configuration.
3775
3776   ConfigWriter takes care of distributing the config and ssconf files, but
3777   there are more files which should be distributed to all nodes. This function
3778   makes sure those are copied.
3779
3780   @param lu: calling logical unit
3781   @param additional_nodes: list of nodes not in the config to distribute to
3782   @type additional_vm: boolean
3783   @param additional_vm: whether the additional nodes are vm-capable or not
3784
3785   """
3786   # Gather target nodes
3787   cluster = lu.cfg.GetClusterInfo()
3788   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3789
3790   online_nodes = lu.cfg.GetOnlineNodeList()
3791   vm_nodes = lu.cfg.GetVmCapableNodeList()
3792
3793   if additional_nodes is not None:
3794     online_nodes.extend(additional_nodes)
3795     if additional_vm:
3796       vm_nodes.extend(additional_nodes)
3797
3798   # Never distribute to master node
3799   for nodelist in [online_nodes, vm_nodes]:
3800     if master_info.name in nodelist:
3801       nodelist.remove(master_info.name)
3802
3803   # Gather file lists
3804   (files_all, files_all_opt, files_mc, files_vm) = \
3805     _ComputeAncillaryFiles(cluster, True)
3806
3807   # Never re-distribute configuration file from here
3808   assert not (constants.CLUSTER_CONF_FILE in files_all or
3809               constants.CLUSTER_CONF_FILE in files_vm)
3810   assert not files_mc, "Master candidates not handled in this function"
3811
3812   filemap = [
3813     (online_nodes, files_all),
3814     (online_nodes, files_all_opt),
3815     (vm_nodes, files_vm),
3816     ]
3817
3818   # Upload the files
3819   for (node_list, files) in filemap:
3820     for fname in files:
3821       _UploadHelper(lu, node_list, fname)
3822
3823
3824 class LUClusterRedistConf(NoHooksLU):
3825   """Force the redistribution of cluster configuration.
3826
3827   This is a very simple LU.
3828
3829   """
3830   REQ_BGL = False
3831
3832   def ExpandNames(self):
3833     self.needed_locks = {
3834       locking.LEVEL_NODE: locking.ALL_SET,
3835     }
3836     self.share_locks[locking.LEVEL_NODE] = 1
3837
3838   def Exec(self, feedback_fn):
3839     """Redistribute the configuration.
3840
3841     """
3842     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3843     _RedistributeAncillaryFiles(self)
3844
3845
3846 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3847   """Sleep and poll for an instance's disk to sync.
3848
3849   """
3850   if not instance.disks or disks is not None and not disks:
3851     return True
3852
3853   disks = _ExpandCheckDisks(instance, disks)
3854
3855   if not oneshot:
3856     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3857
3858   node = instance.primary_node
3859
3860   for dev in disks:
3861     lu.cfg.SetDiskID(dev, node)
3862
3863   # TODO: Convert to utils.Retry
3864
3865   retries = 0
3866   degr_retries = 10 # in seconds, as we sleep 1 second each time
3867   while True:
3868     max_time = 0
3869     done = True
3870     cumul_degraded = False
3871     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3872     msg = rstats.fail_msg
3873     if msg:
3874       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3875       retries += 1
3876       if retries >= 10:
3877         raise errors.RemoteError("Can't contact node %s for mirror data,"
3878                                  " aborting." % node)
3879       time.sleep(6)
3880       continue
3881     rstats = rstats.payload
3882     retries = 0
3883     for i, mstat in enumerate(rstats):
3884       if mstat is None:
3885         lu.LogWarning("Can't compute data for node %s/%s",
3886                            node, disks[i].iv_name)
3887         continue
3888
3889       cumul_degraded = (cumul_degraded or
3890                         (mstat.is_degraded and mstat.sync_percent is None))
3891       if mstat.sync_percent is not None:
3892         done = False
3893         if mstat.estimated_time is not None:
3894           rem_time = ("%s remaining (estimated)" %
3895                       utils.FormatSeconds(mstat.estimated_time))
3896           max_time = mstat.estimated_time
3897         else:
3898           rem_time = "no time estimate"
3899         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3900                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3901
3902     # if we're done but degraded, let's do a few small retries, to
3903     # make sure we see a stable and not transient situation; therefore
3904     # we force restart of the loop
3905     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3906       logging.info("Degraded disks found, %d retries left", degr_retries)
3907       degr_retries -= 1
3908       time.sleep(1)
3909       continue
3910
3911     if done or oneshot:
3912       break
3913
3914     time.sleep(min(60, max_time))
3915
3916   if done:
3917     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3918   return not cumul_degraded
3919
3920
3921 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3922   """Check that mirrors are not degraded.
3923
3924   The ldisk parameter, if True, will change the test from the
3925   is_degraded attribute (which represents overall non-ok status for
3926   the device(s)) to the ldisk (representing the local storage status).
3927
3928   """
3929   lu.cfg.SetDiskID(dev, node)
3930
3931   result = True
3932
3933   if on_primary or dev.AssembleOnSecondary():
3934     rstats = lu.rpc.call_blockdev_find(node, dev)
3935     msg = rstats.fail_msg
3936     if msg:
3937       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3938       result = False
3939     elif not rstats.payload:
3940       lu.LogWarning("Can't find disk on node %s", node)
3941       result = False
3942     else:
3943       if ldisk:
3944         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3945       else:
3946         result = result and not rstats.payload.is_degraded
3947
3948   if dev.children:
3949     for child in dev.children:
3950       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3951
3952   return result
3953
3954
3955 class LUOobCommand(NoHooksLU):
3956   """Logical unit for OOB handling.
3957
3958   """
3959   REG_BGL = False
3960   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3961
3962   def ExpandNames(self):
3963     """Gather locks we need.
3964
3965     """
3966     if self.op.node_names:
3967       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3968       lock_names = self.op.node_names
3969     else:
3970       lock_names = locking.ALL_SET
3971
3972     self.needed_locks = {
3973       locking.LEVEL_NODE: lock_names,
3974       }
3975
3976   def CheckPrereq(self):
3977     """Check prerequisites.
3978
3979     This checks:
3980      - the node exists in the configuration
3981      - OOB is supported
3982
3983     Any errors are signaled by raising errors.OpPrereqError.
3984
3985     """
3986     self.nodes = []
3987     self.master_node = self.cfg.GetMasterNode()
3988
3989     assert self.op.power_delay >= 0.0
3990
3991     if self.op.node_names:
3992       if (self.op.command in self._SKIP_MASTER and
3993           self.master_node in self.op.node_names):
3994         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3995         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3996
3997         if master_oob_handler:
3998           additional_text = ("run '%s %s %s' if you want to operate on the"
3999                              " master regardless") % (master_oob_handler,
4000                                                       self.op.command,
4001                                                       self.master_node)
4002         else:
4003           additional_text = "it does not support out-of-band operations"
4004
4005         raise errors.OpPrereqError(("Operating on the master node %s is not"
4006                                     " allowed for %s; %s") %
4007                                    (self.master_node, self.op.command,
4008                                     additional_text), errors.ECODE_INVAL)
4009     else:
4010       self.op.node_names = self.cfg.GetNodeList()
4011       if self.op.command in self._SKIP_MASTER:
4012         self.op.node_names.remove(self.master_node)
4013
4014     if self.op.command in self._SKIP_MASTER:
4015       assert self.master_node not in self.op.node_names
4016
4017     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4018       if node is None:
4019         raise errors.OpPrereqError("Node %s not found" % node_name,
4020                                    errors.ECODE_NOENT)
4021       else:
4022         self.nodes.append(node)
4023
4024       if (not self.op.ignore_status and
4025           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4026         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4027                                     " not marked offline") % node_name,
4028                                    errors.ECODE_STATE)
4029
4030   def Exec(self, feedback_fn):
4031     """Execute OOB and return result if we expect any.
4032
4033     """
4034     master_node = self.master_node
4035     ret = []
4036
4037     for idx, node in enumerate(utils.NiceSort(self.nodes,
4038                                               key=lambda node: node.name)):
4039       node_entry = [(constants.RS_NORMAL, node.name)]
4040       ret.append(node_entry)
4041
4042       oob_program = _SupportsOob(self.cfg, node)
4043
4044       if not oob_program:
4045         node_entry.append((constants.RS_UNAVAIL, None))
4046         continue
4047
4048       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4049                    self.op.command, oob_program, node.name)
4050       result = self.rpc.call_run_oob(master_node, oob_program,
4051                                      self.op.command, node.name,
4052                                      self.op.timeout)
4053
4054       if result.fail_msg:
4055         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4056                         node.name, result.fail_msg)
4057         node_entry.append((constants.RS_NODATA, None))
4058       else:
4059         try:
4060           self._CheckPayload(result)
4061         except errors.OpExecError, err:
4062           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4063                           node.name, err)
4064           node_entry.append((constants.RS_NODATA, None))
4065         else:
4066           if self.op.command == constants.OOB_HEALTH:
4067             # For health we should log important events
4068             for item, status in result.payload:
4069               if status in [constants.OOB_STATUS_WARNING,
4070                             constants.OOB_STATUS_CRITICAL]:
4071                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4072                                 item, node.name, status)
4073
4074           if self.op.command == constants.OOB_POWER_ON:
4075             node.powered = True
4076           elif self.op.command == constants.OOB_POWER_OFF:
4077             node.powered = False
4078           elif self.op.command == constants.OOB_POWER_STATUS:
4079             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4080             if powered != node.powered:
4081               logging.warning(("Recorded power state (%s) of node '%s' does not"
4082                                " match actual power state (%s)"), node.powered,
4083                               node.name, powered)
4084
4085           # For configuration changing commands we should update the node
4086           if self.op.command in (constants.OOB_POWER_ON,
4087                                  constants.OOB_POWER_OFF):
4088             self.cfg.Update(node, feedback_fn)
4089
4090           node_entry.append((constants.RS_NORMAL, result.payload))
4091
4092           if (self.op.command == constants.OOB_POWER_ON and
4093               idx < len(self.nodes) - 1):
4094             time.sleep(self.op.power_delay)
4095
4096     return ret
4097
4098   def _CheckPayload(self, result):
4099     """Checks if the payload is valid.
4100
4101     @param result: RPC result
4102     @raises errors.OpExecError: If payload is not valid
4103
4104     """
4105     errs = []
4106     if self.op.command == constants.OOB_HEALTH:
4107       if not isinstance(result.payload, list):
4108         errs.append("command 'health' is expected to return a list but got %s" %
4109                     type(result.payload))
4110       else:
4111         for item, status in result.payload:
4112           if status not in constants.OOB_STATUSES:
4113             errs.append("health item '%s' has invalid status '%s'" %
4114                         (item, status))
4115
4116     if self.op.command == constants.OOB_POWER_STATUS:
4117       if not isinstance(result.payload, dict):
4118         errs.append("power-status is expected to return a dict but got %s" %
4119                     type(result.payload))
4120
4121     if self.op.command in [
4122         constants.OOB_POWER_ON,
4123         constants.OOB_POWER_OFF,
4124         constants.OOB_POWER_CYCLE,
4125         ]:
4126       if result.payload is not None:
4127         errs.append("%s is expected to not return payload but got '%s'" %
4128                     (self.op.command, result.payload))
4129
4130     if errs:
4131       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4132                                utils.CommaJoin(errs))
4133
4134
4135 class _OsQuery(_QueryBase):
4136   FIELDS = query.OS_FIELDS
4137
4138   def ExpandNames(self, lu):
4139     # Lock all nodes in shared mode
4140     # Temporary removal of locks, should be reverted later
4141     # TODO: reintroduce locks when they are lighter-weight
4142     lu.needed_locks = {}
4143     #self.share_locks[locking.LEVEL_NODE] = 1
4144     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4145
4146     # The following variables interact with _QueryBase._GetNames
4147     if self.names:
4148       self.wanted = self.names
4149     else:
4150       self.wanted = locking.ALL_SET
4151
4152     self.do_locking = self.use_locking
4153
4154   def DeclareLocks(self, lu, level):
4155     pass
4156
4157   @staticmethod
4158   def _DiagnoseByOS(rlist):
4159     """Remaps a per-node return list into an a per-os per-node dictionary
4160
4161     @param rlist: a map with node names as keys and OS objects as values
4162
4163     @rtype: dict
4164     @return: a dictionary with osnames as keys and as value another
4165         map, with nodes as keys and tuples of (path, status, diagnose,
4166         variants, parameters, api_versions) as values, eg::
4167
4168           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4169                                      (/srv/..., False, "invalid api")],
4170                            "node2": [(/srv/..., True, "", [], [])]}
4171           }
4172
4173     """
4174     all_os = {}
4175     # we build here the list of nodes that didn't fail the RPC (at RPC
4176     # level), so that nodes with a non-responding node daemon don't
4177     # make all OSes invalid
4178     good_nodes = [node_name for node_name in rlist
4179                   if not rlist[node_name].fail_msg]
4180     for node_name, nr in rlist.items():
4181       if nr.fail_msg or not nr.payload:
4182         continue
4183       for (name, path, status, diagnose, variants,
4184            params, api_versions) in nr.payload:
4185         if name not in all_os:
4186           # build a list of nodes for this os containing empty lists
4187           # for each node in node_list
4188           all_os[name] = {}
4189           for nname in good_nodes:
4190             all_os[name][nname] = []
4191         # convert params from [name, help] to (name, help)
4192         params = [tuple(v) for v in params]
4193         all_os[name][node_name].append((path, status, diagnose,
4194                                         variants, params, api_versions))
4195     return all_os
4196
4197   def _GetQueryData(self, lu):
4198     """Computes the list of nodes and their attributes.
4199
4200     """
4201     # Locking is not used
4202     assert not (compat.any(lu.glm.is_owned(level)
4203                            for level in locking.LEVELS
4204                            if level != locking.LEVEL_CLUSTER) or
4205                 self.do_locking or self.use_locking)
4206
4207     valid_nodes = [node.name
4208                    for node in lu.cfg.GetAllNodesInfo().values()
4209                    if not node.offline and node.vm_capable]
4210     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4211     cluster = lu.cfg.GetClusterInfo()
4212
4213     data = {}
4214
4215     for (os_name, os_data) in pol.items():
4216       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4217                           hidden=(os_name in cluster.hidden_os),
4218                           blacklisted=(os_name in cluster.blacklisted_os))
4219
4220       variants = set()
4221       parameters = set()
4222       api_versions = set()
4223
4224       for idx, osl in enumerate(os_data.values()):
4225         info.valid = bool(info.valid and osl and osl[0][1])
4226         if not info.valid:
4227           break
4228
4229         (node_variants, node_params, node_api) = osl[0][3:6]
4230         if idx == 0:
4231           # First entry
4232           variants.update(node_variants)
4233           parameters.update(node_params)
4234           api_versions.update(node_api)
4235         else:
4236           # Filter out inconsistent values
4237           variants.intersection_update(node_variants)
4238           parameters.intersection_update(node_params)
4239           api_versions.intersection_update(node_api)
4240
4241       info.variants = list(variants)
4242       info.parameters = list(parameters)
4243       info.api_versions = list(api_versions)
4244
4245       data[os_name] = info
4246
4247     # Prepare data in requested order
4248     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4249             if name in data]
4250
4251
4252 class LUOsDiagnose(NoHooksLU):
4253   """Logical unit for OS diagnose/query.
4254
4255   """
4256   REQ_BGL = False
4257
4258   @staticmethod
4259   def _BuildFilter(fields, names):
4260     """Builds a filter for querying OSes.
4261
4262     """
4263     name_filter = qlang.MakeSimpleFilter("name", names)
4264
4265     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4266     # respective field is not requested
4267     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4268                      for fname in ["hidden", "blacklisted"]
4269                      if fname not in fields]
4270     if "valid" not in fields:
4271       status_filter.append([qlang.OP_TRUE, "valid"])
4272
4273     if status_filter:
4274       status_filter.insert(0, qlang.OP_AND)
4275     else:
4276       status_filter = None
4277
4278     if name_filter and status_filter:
4279       return [qlang.OP_AND, name_filter, status_filter]
4280     elif name_filter:
4281       return name_filter
4282     else:
4283       return status_filter
4284
4285   def CheckArguments(self):
4286     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4287                        self.op.output_fields, False)
4288
4289   def ExpandNames(self):
4290     self.oq.ExpandNames(self)
4291
4292   def Exec(self, feedback_fn):
4293     return self.oq.OldStyleQuery(self)
4294
4295
4296 class LUNodeRemove(LogicalUnit):
4297   """Logical unit for removing a node.
4298
4299   """
4300   HPATH = "node-remove"
4301   HTYPE = constants.HTYPE_NODE
4302
4303   def BuildHooksEnv(self):
4304     """Build hooks env.
4305
4306     This doesn't run on the target node in the pre phase as a failed
4307     node would then be impossible to remove.
4308
4309     """
4310     return {
4311       "OP_TARGET": self.op.node_name,
4312       "NODE_NAME": self.op.node_name,
4313       }
4314
4315   def BuildHooksNodes(self):
4316     """Build hooks nodes.
4317
4318     """
4319     all_nodes = self.cfg.GetNodeList()
4320     try:
4321       all_nodes.remove(self.op.node_name)
4322     except ValueError:
4323       logging.warning("Node '%s', which is about to be removed, was not found"
4324                       " in the list of all nodes", self.op.node_name)
4325     return (all_nodes, all_nodes)
4326
4327   def CheckPrereq(self):
4328     """Check prerequisites.
4329
4330     This checks:
4331      - the node exists in the configuration
4332      - it does not have primary or secondary instances
4333      - it's not the master
4334
4335     Any errors are signaled by raising errors.OpPrereqError.
4336
4337     """
4338     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4339     node = self.cfg.GetNodeInfo(self.op.node_name)
4340     assert node is not None
4341
4342     masternode = self.cfg.GetMasterNode()
4343     if node.name == masternode:
4344       raise errors.OpPrereqError("Node is the master node, failover to another"
4345                                  " node is required", errors.ECODE_INVAL)
4346
4347     for instance_name, instance in self.cfg.GetAllInstancesInfo():
4348       if node.name in instance.all_nodes:
4349         raise errors.OpPrereqError("Instance %s is still running on the node,"
4350                                    " please remove first" % instance_name,
4351                                    errors.ECODE_INVAL)
4352     self.op.node_name = node.name
4353     self.node = node
4354
4355   def Exec(self, feedback_fn):
4356     """Removes the node from the cluster.
4357
4358     """
4359     node = self.node
4360     logging.info("Stopping the node daemon and removing configs from node %s",
4361                  node.name)
4362
4363     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4364
4365     # Promote nodes to master candidate as needed
4366     _AdjustCandidatePool(self, exceptions=[node.name])
4367     self.context.RemoveNode(node.name)
4368
4369     # Run post hooks on the node before it's removed
4370     _RunPostHook(self, node.name)
4371
4372     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4373     msg = result.fail_msg
4374     if msg:
4375       self.LogWarning("Errors encountered on the remote node while leaving"
4376                       " the cluster: %s", msg)
4377
4378     # Remove node from our /etc/hosts
4379     if self.cfg.GetClusterInfo().modify_etc_hosts:
4380       master_node = self.cfg.GetMasterNode()
4381       result = self.rpc.call_etc_hosts_modify(master_node,
4382                                               constants.ETC_HOSTS_REMOVE,
4383                                               node.name, None)
4384       result.Raise("Can't update hosts file with new host data")
4385       _RedistributeAncillaryFiles(self)
4386
4387
4388 class _NodeQuery(_QueryBase):
4389   FIELDS = query.NODE_FIELDS
4390
4391   def ExpandNames(self, lu):
4392     lu.needed_locks = {}
4393     lu.share_locks = _ShareAll()
4394
4395     if self.names:
4396       self.wanted = _GetWantedNodes(lu, self.names)
4397     else:
4398       self.wanted = locking.ALL_SET
4399
4400     self.do_locking = (self.use_locking and
4401                        query.NQ_LIVE in self.requested_data)
4402
4403     if self.do_locking:
4404       # If any non-static field is requested we need to lock the nodes
4405       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4406
4407   def DeclareLocks(self, lu, level):
4408     pass
4409
4410   def _GetQueryData(self, lu):
4411     """Computes the list of nodes and their attributes.
4412
4413     """
4414     all_info = lu.cfg.GetAllNodesInfo()
4415
4416     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4417
4418     # Gather data as requested
4419     if query.NQ_LIVE in self.requested_data:
4420       # filter out non-vm_capable nodes
4421       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4422
4423       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4424                                         lu.cfg.GetHypervisorType())
4425       live_data = dict((name, nresult.payload)
4426                        for (name, nresult) in node_data.items()
4427                        if not nresult.fail_msg and nresult.payload)
4428     else:
4429       live_data = None
4430
4431     if query.NQ_INST in self.requested_data:
4432       node_to_primary = dict([(name, set()) for name in nodenames])
4433       node_to_secondary = dict([(name, set()) for name in nodenames])
4434
4435       inst_data = lu.cfg.GetAllInstancesInfo()
4436
4437       for inst in inst_data.values():
4438         if inst.primary_node in node_to_primary:
4439           node_to_primary[inst.primary_node].add(inst.name)
4440         for secnode in inst.secondary_nodes:
4441           if secnode in node_to_secondary:
4442             node_to_secondary[secnode].add(inst.name)
4443     else:
4444       node_to_primary = None
4445       node_to_secondary = None
4446
4447     if query.NQ_OOB in self.requested_data:
4448       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4449                          for name, node in all_info.iteritems())
4450     else:
4451       oob_support = None
4452
4453     if query.NQ_GROUP in self.requested_data:
4454       groups = lu.cfg.GetAllNodeGroupsInfo()
4455     else:
4456       groups = {}
4457
4458     return query.NodeQueryData([all_info[name] for name in nodenames],
4459                                live_data, lu.cfg.GetMasterNode(),
4460                                node_to_primary, node_to_secondary, groups,
4461                                oob_support, lu.cfg.GetClusterInfo())
4462
4463
4464 class LUNodeQuery(NoHooksLU):
4465   """Logical unit for querying nodes.
4466
4467   """
4468   # pylint: disable=W0142
4469   REQ_BGL = False
4470
4471   def CheckArguments(self):
4472     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4473                          self.op.output_fields, self.op.use_locking)
4474
4475   def ExpandNames(self):
4476     self.nq.ExpandNames(self)
4477
4478   def Exec(self, feedback_fn):
4479     return self.nq.OldStyleQuery(self)
4480
4481
4482 class LUNodeQueryvols(NoHooksLU):
4483   """Logical unit for getting volumes on node(s).
4484
4485   """
4486   REQ_BGL = False
4487   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4488   _FIELDS_STATIC = utils.FieldSet("node")
4489
4490   def CheckArguments(self):
4491     _CheckOutputFields(static=self._FIELDS_STATIC,
4492                        dynamic=self._FIELDS_DYNAMIC,
4493                        selected=self.op.output_fields)
4494
4495   def ExpandNames(self):
4496     self.needed_locks = {}
4497     self.share_locks[locking.LEVEL_NODE] = 1
4498     if not self.op.nodes:
4499       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4500     else:
4501       self.needed_locks[locking.LEVEL_NODE] = \
4502         _GetWantedNodes(self, self.op.nodes)
4503
4504   def Exec(self, feedback_fn):
4505     """Computes the list of nodes and their attributes.
4506
4507     """
4508     nodenames = self.owned_locks(locking.LEVEL_NODE)
4509     volumes = self.rpc.call_node_volumes(nodenames)
4510
4511     ilist = self.cfg.GetAllInstancesInfo()
4512     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4513
4514     output = []
4515     for node in nodenames:
4516       nresult = volumes[node]
4517       if nresult.offline:
4518         continue
4519       msg = nresult.fail_msg
4520       if msg:
4521         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4522         continue
4523
4524       node_vols = sorted(nresult.payload,
4525                          key=operator.itemgetter("dev"))
4526
4527       for vol in node_vols:
4528         node_output = []
4529         for field in self.op.output_fields:
4530           if field == "node":
4531             val = node
4532           elif field == "phys":
4533             val = vol["dev"]
4534           elif field == "vg":
4535             val = vol["vg"]
4536           elif field == "name":
4537             val = vol["name"]
4538           elif field == "size":
4539             val = int(float(vol["size"]))
4540           elif field == "instance":
4541             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4542           else:
4543             raise errors.ParameterError(field)
4544           node_output.append(str(val))
4545
4546         output.append(node_output)
4547
4548     return output
4549
4550
4551 class LUNodeQueryStorage(NoHooksLU):
4552   """Logical unit for getting information on storage units on node(s).
4553
4554   """
4555   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4556   REQ_BGL = False
4557
4558   def CheckArguments(self):
4559     _CheckOutputFields(static=self._FIELDS_STATIC,
4560                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4561                        selected=self.op.output_fields)
4562
4563   def ExpandNames(self):
4564     self.needed_locks = {}
4565     self.share_locks[locking.LEVEL_NODE] = 1
4566
4567     if self.op.nodes:
4568       self.needed_locks[locking.LEVEL_NODE] = \
4569         _GetWantedNodes(self, self.op.nodes)
4570     else:
4571       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4572
4573   def Exec(self, feedback_fn):
4574     """Computes the list of nodes and their attributes.
4575
4576     """
4577     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4578
4579     # Always get name to sort by
4580     if constants.SF_NAME in self.op.output_fields:
4581       fields = self.op.output_fields[:]
4582     else:
4583       fields = [constants.SF_NAME] + self.op.output_fields
4584
4585     # Never ask for node or type as it's only known to the LU
4586     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4587       while extra in fields:
4588         fields.remove(extra)
4589
4590     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4591     name_idx = field_idx[constants.SF_NAME]
4592
4593     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4594     data = self.rpc.call_storage_list(self.nodes,
4595                                       self.op.storage_type, st_args,
4596                                       self.op.name, fields)
4597
4598     result = []
4599
4600     for node in utils.NiceSort(self.nodes):
4601       nresult = data[node]
4602       if nresult.offline:
4603         continue
4604
4605       msg = nresult.fail_msg
4606       if msg:
4607         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4608         continue
4609
4610       rows = dict([(row[name_idx], row) for row in nresult.payload])
4611
4612       for name in utils.NiceSort(rows.keys()):
4613         row = rows[name]
4614
4615         out = []
4616
4617         for field in self.op.output_fields:
4618           if field == constants.SF_NODE:
4619             val = node
4620           elif field == constants.SF_TYPE:
4621             val = self.op.storage_type
4622           elif field in field_idx:
4623             val = row[field_idx[field]]
4624           else:
4625             raise errors.ParameterError(field)
4626
4627           out.append(val)
4628
4629         result.append(out)
4630
4631     return result
4632
4633
4634 class _InstanceQuery(_QueryBase):
4635   FIELDS = query.INSTANCE_FIELDS
4636
4637   def ExpandNames(self, lu):
4638     lu.needed_locks = {}
4639     lu.share_locks = _ShareAll()
4640
4641     if self.names:
4642       self.wanted = _GetWantedInstances(lu, self.names)
4643     else:
4644       self.wanted = locking.ALL_SET
4645
4646     self.do_locking = (self.use_locking and
4647                        query.IQ_LIVE in self.requested_data)
4648     if self.do_locking:
4649       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4650       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4651       lu.needed_locks[locking.LEVEL_NODE] = []
4652       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4653
4654     self.do_grouplocks = (self.do_locking and
4655                           query.IQ_NODES in self.requested_data)
4656
4657   def DeclareLocks(self, lu, level):
4658     if self.do_locking:
4659       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4660         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4661
4662         # Lock all groups used by instances optimistically; this requires going
4663         # via the node before it's locked, requiring verification later on
4664         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4665           set(group_uuid
4666               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4667               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4668       elif level == locking.LEVEL_NODE:
4669         lu._LockInstancesNodes() # pylint: disable=W0212
4670
4671   @staticmethod
4672   def _CheckGroupLocks(lu):
4673     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4674     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4675
4676     # Check if node groups for locked instances are still correct
4677     for instance_name in owned_instances:
4678       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4679
4680   def _GetQueryData(self, lu):
4681     """Computes the list of instances and their attributes.
4682
4683     """
4684     if self.do_grouplocks:
4685       self._CheckGroupLocks(lu)
4686
4687     cluster = lu.cfg.GetClusterInfo()
4688     all_info = lu.cfg.GetAllInstancesInfo()
4689
4690     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4691
4692     instance_list = [all_info[name] for name in instance_names]
4693     nodes = frozenset(itertools.chain(*(inst.all_nodes
4694                                         for inst in instance_list)))
4695     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4696     bad_nodes = []
4697     offline_nodes = []
4698     wrongnode_inst = set()
4699
4700     # Gather data as requested
4701     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4702       live_data = {}
4703       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4704       for name in nodes:
4705         result = node_data[name]
4706         if result.offline:
4707           # offline nodes will be in both lists
4708           assert result.fail_msg
4709           offline_nodes.append(name)
4710         if result.fail_msg:
4711           bad_nodes.append(name)
4712         elif result.payload:
4713           for inst in result.payload:
4714             if inst in all_info:
4715               if all_info[inst].primary_node == name:
4716                 live_data.update(result.payload)
4717               else:
4718                 wrongnode_inst.add(inst)
4719             else:
4720               # orphan instance; we don't list it here as we don't
4721               # handle this case yet in the output of instance listing
4722               logging.warning("Orphan instance '%s' found on node %s",
4723                               inst, name)
4724         # else no instance is alive
4725     else:
4726       live_data = {}
4727
4728     if query.IQ_DISKUSAGE in self.requested_data:
4729       disk_usage = dict((inst.name,
4730                          _ComputeDiskSize(inst.disk_template,
4731                                           [{constants.IDISK_SIZE: disk.size}
4732                                            for disk in inst.disks]))
4733                         for inst in instance_list)
4734     else:
4735       disk_usage = None
4736
4737     if query.IQ_CONSOLE in self.requested_data:
4738       consinfo = {}
4739       for inst in instance_list:
4740         if inst.name in live_data:
4741           # Instance is running
4742           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4743         else:
4744           consinfo[inst.name] = None
4745       assert set(consinfo.keys()) == set(instance_names)
4746     else:
4747       consinfo = None
4748
4749     if query.IQ_NODES in self.requested_data:
4750       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4751                                             instance_list)))
4752       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4753       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4754                     for uuid in set(map(operator.attrgetter("group"),
4755                                         nodes.values())))
4756     else:
4757       nodes = None
4758       groups = None
4759
4760     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4761                                    disk_usage, offline_nodes, bad_nodes,
4762                                    live_data, wrongnode_inst, consinfo,
4763                                    nodes, groups)
4764
4765
4766 class LUQuery(NoHooksLU):
4767   """Query for resources/items of a certain kind.
4768
4769   """
4770   # pylint: disable=W0142
4771   REQ_BGL = False
4772
4773   def CheckArguments(self):
4774     qcls = _GetQueryImplementation(self.op.what)
4775
4776     self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4777
4778   def ExpandNames(self):
4779     self.impl.ExpandNames(self)
4780
4781   def DeclareLocks(self, level):
4782     self.impl.DeclareLocks(self, level)
4783
4784   def Exec(self, feedback_fn):
4785     return self.impl.NewStyleQuery(self)
4786
4787
4788 class LUQueryFields(NoHooksLU):
4789   """Query for resources/items of a certain kind.
4790
4791   """
4792   # pylint: disable=W0142
4793   REQ_BGL = False
4794
4795   def CheckArguments(self):
4796     self.qcls = _GetQueryImplementation(self.op.what)
4797
4798   def ExpandNames(self):
4799     self.needed_locks = {}
4800
4801   def Exec(self, feedback_fn):
4802     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4803
4804
4805 class LUNodeModifyStorage(NoHooksLU):
4806   """Logical unit for modifying a storage volume on a node.
4807
4808   """
4809   REQ_BGL = False
4810
4811   def CheckArguments(self):
4812     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4813
4814     storage_type = self.op.storage_type
4815
4816     try:
4817       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4818     except KeyError:
4819       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4820                                  " modified" % storage_type,
4821                                  errors.ECODE_INVAL)
4822
4823     diff = set(self.op.changes.keys()) - modifiable
4824     if diff:
4825       raise errors.OpPrereqError("The following fields can not be modified for"
4826                                  " storage units of type '%s': %r" %
4827                                  (storage_type, list(diff)),
4828                                  errors.ECODE_INVAL)
4829
4830   def ExpandNames(self):
4831     self.needed_locks = {
4832       locking.LEVEL_NODE: self.op.node_name,
4833       }
4834
4835   def Exec(self, feedback_fn):
4836     """Computes the list of nodes and their attributes.
4837
4838     """
4839     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4840     result = self.rpc.call_storage_modify(self.op.node_name,
4841                                           self.op.storage_type, st_args,
4842                                           self.op.name, self.op.changes)
4843     result.Raise("Failed to modify storage unit '%s' on %s" %
4844                  (self.op.name, self.op.node_name))
4845
4846
4847 class LUNodeAdd(LogicalUnit):
4848   """Logical unit for adding node to the cluster.
4849
4850   """
4851   HPATH = "node-add"
4852   HTYPE = constants.HTYPE_NODE
4853   _NFLAGS = ["master_capable", "vm_capable"]
4854
4855   def CheckArguments(self):
4856     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4857     # validate/normalize the node name
4858     self.hostname = netutils.GetHostname(name=self.op.node_name,
4859                                          family=self.primary_ip_family)
4860     self.op.node_name = self.hostname.name
4861
4862     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4863       raise errors.OpPrereqError("Cannot readd the master node",
4864                                  errors.ECODE_STATE)
4865
4866     if self.op.readd and self.op.group:
4867       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4868                                  " being readded", errors.ECODE_INVAL)
4869
4870   def BuildHooksEnv(self):
4871     """Build hooks env.
4872
4873     This will run on all nodes before, and on all nodes + the new node after.
4874
4875     """
4876     return {
4877       "OP_TARGET": self.op.node_name,
4878       "NODE_NAME": self.op.node_name,
4879       "NODE_PIP": self.op.primary_ip,
4880       "NODE_SIP": self.op.secondary_ip,
4881       "MASTER_CAPABLE": str(self.op.master_capable),
4882       "VM_CAPABLE": str(self.op.vm_capable),
4883       }
4884
4885   def BuildHooksNodes(self):
4886     """Build hooks nodes.
4887
4888     """
4889     # Exclude added node
4890     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4891     post_nodes = pre_nodes + [self.op.node_name, ]
4892
4893     return (pre_nodes, post_nodes)
4894
4895   def CheckPrereq(self):
4896     """Check prerequisites.
4897
4898     This checks:
4899      - the new node is not already in the config
4900      - it is resolvable
4901      - its parameters (single/dual homed) matches the cluster
4902
4903     Any errors are signaled by raising errors.OpPrereqError.
4904
4905     """
4906     cfg = self.cfg
4907     hostname = self.hostname
4908     node = hostname.name
4909     primary_ip = self.op.primary_ip = hostname.ip
4910     if self.op.secondary_ip is None:
4911       if self.primary_ip_family == netutils.IP6Address.family:
4912         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4913                                    " IPv4 address must be given as secondary",
4914                                    errors.ECODE_INVAL)
4915       self.op.secondary_ip = primary_ip
4916
4917     secondary_ip = self.op.secondary_ip
4918     if not netutils.IP4Address.IsValid(secondary_ip):
4919       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4920                                  " address" % secondary_ip, errors.ECODE_INVAL)
4921
4922     node_list = cfg.GetNodeList()
4923     if not self.op.readd and node in node_list:
4924       raise errors.OpPrereqError("Node %s is already in the configuration" %
4925                                  node, errors.ECODE_EXISTS)
4926     elif self.op.readd and node not in node_list:
4927       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4928                                  errors.ECODE_NOENT)
4929
4930     self.changed_primary_ip = False
4931
4932     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4933       if self.op.readd and node == existing_node_name:
4934         if existing_node.secondary_ip != secondary_ip:
4935           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4936                                      " address configuration as before",
4937                                      errors.ECODE_INVAL)
4938         if existing_node.primary_ip != primary_ip:
4939           self.changed_primary_ip = True
4940
4941         continue
4942
4943       if (existing_node.primary_ip == primary_ip or
4944           existing_node.secondary_ip == primary_ip or
4945           existing_node.primary_ip == secondary_ip or
4946           existing_node.secondary_ip == secondary_ip):
4947         raise errors.OpPrereqError("New node ip address(es) conflict with"
4948                                    " existing node %s" % existing_node.name,
4949                                    errors.ECODE_NOTUNIQUE)
4950
4951     # After this 'if' block, None is no longer a valid value for the
4952     # _capable op attributes
4953     if self.op.readd:
4954       old_node = self.cfg.GetNodeInfo(node)
4955       assert old_node is not None, "Can't retrieve locked node %s" % node
4956       for attr in self._NFLAGS:
4957         if getattr(self.op, attr) is None:
4958           setattr(self.op, attr, getattr(old_node, attr))
4959     else:
4960       for attr in self._NFLAGS:
4961         if getattr(self.op, attr) is None:
4962           setattr(self.op, attr, True)
4963
4964     if self.op.readd and not self.op.vm_capable:
4965       pri, sec = cfg.GetNodeInstances(node)
4966       if pri or sec:
4967         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4968                                    " flag set to false, but it already holds"
4969                                    " instances" % node,
4970                                    errors.ECODE_STATE)
4971
4972     # check that the type of the node (single versus dual homed) is the
4973     # same as for the master
4974     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4975     master_singlehomed = myself.secondary_ip == myself.primary_ip
4976     newbie_singlehomed = secondary_ip == primary_ip
4977     if master_singlehomed != newbie_singlehomed:
4978       if master_singlehomed:
4979         raise errors.OpPrereqError("The master has no secondary ip but the"
4980                                    " new node has one",
4981                                    errors.ECODE_INVAL)
4982       else:
4983         raise errors.OpPrereqError("The master has a secondary ip but the"
4984                                    " new node doesn't have one",
4985                                    errors.ECODE_INVAL)
4986
4987     # checks reachability
4988     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4989       raise errors.OpPrereqError("Node not reachable by ping",
4990                                  errors.ECODE_ENVIRON)
4991
4992     if not newbie_singlehomed:
4993       # check reachability from my secondary ip to newbie's secondary ip
4994       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4995                            source=myself.secondary_ip):
4996         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4997                                    " based ping to node daemon port",
4998                                    errors.ECODE_ENVIRON)
4999
5000     if self.op.readd:
5001       exceptions = [node]
5002     else:
5003       exceptions = []
5004
5005     if self.op.master_capable:
5006       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5007     else:
5008       self.master_candidate = False
5009
5010     if self.op.readd:
5011       self.new_node = old_node
5012     else:
5013       node_group = cfg.LookupNodeGroup(self.op.group)
5014       self.new_node = objects.Node(name=node,
5015                                    primary_ip=primary_ip,
5016                                    secondary_ip=secondary_ip,
5017                                    master_candidate=self.master_candidate,
5018                                    offline=False, drained=False,
5019                                    group=node_group)
5020
5021     if self.op.ndparams:
5022       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5023
5024   def Exec(self, feedback_fn):
5025     """Adds the new node to the cluster.
5026
5027     """
5028     new_node = self.new_node
5029     node = new_node.name
5030
5031     # We adding a new node so we assume it's powered
5032     new_node.powered = True
5033
5034     # for re-adds, reset the offline/drained/master-candidate flags;
5035     # we need to reset here, otherwise offline would prevent RPC calls
5036     # later in the procedure; this also means that if the re-add
5037     # fails, we are left with a non-offlined, broken node
5038     if self.op.readd:
5039       new_node.drained = new_node.offline = False # pylint: disable=W0201
5040       self.LogInfo("Readding a node, the offline/drained flags were reset")
5041       # if we demote the node, we do cleanup later in the procedure
5042       new_node.master_candidate = self.master_candidate
5043       if self.changed_primary_ip:
5044         new_node.primary_ip = self.op.primary_ip
5045
5046     # copy the master/vm_capable flags
5047     for attr in self._NFLAGS:
5048       setattr(new_node, attr, getattr(self.op, attr))
5049
5050     # notify the user about any possible mc promotion
5051     if new_node.master_candidate:
5052       self.LogInfo("Node will be a master candidate")
5053
5054     if self.op.ndparams:
5055       new_node.ndparams = self.op.ndparams
5056     else:
5057       new_node.ndparams = {}
5058
5059     # check connectivity
5060     result = self.rpc.call_version([node])[node]
5061     result.Raise("Can't get version information from node %s" % node)
5062     if constants.PROTOCOL_VERSION == result.payload:
5063       logging.info("Communication to node %s fine, sw version %s match",
5064                    node, result.payload)
5065     else:
5066       raise errors.OpExecError("Version mismatch master version %s,"
5067                                " node version %s" %
5068                                (constants.PROTOCOL_VERSION, result.payload))
5069
5070     # Add node to our /etc/hosts, and add key to known_hosts
5071     if self.cfg.GetClusterInfo().modify_etc_hosts:
5072       master_node = self.cfg.GetMasterNode()
5073       result = self.rpc.call_etc_hosts_modify(master_node,
5074                                               constants.ETC_HOSTS_ADD,
5075                                               self.hostname.name,
5076                                               self.hostname.ip)
5077       result.Raise("Can't update hosts file with new host data")
5078
5079     if new_node.secondary_ip != new_node.primary_ip:
5080       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5081                                False)
5082
5083     node_verify_list = [self.cfg.GetMasterNode()]
5084     node_verify_param = {
5085       constants.NV_NODELIST: ([node], {}),
5086       # TODO: do a node-net-test as well?
5087     }
5088
5089     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5090                                        self.cfg.GetClusterName())
5091     for verifier in node_verify_list:
5092       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5093       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5094       if nl_payload:
5095         for failed in nl_payload:
5096           feedback_fn("ssh/hostname verification failed"
5097                       " (checking from %s): %s" %
5098                       (verifier, nl_payload[failed]))
5099         raise errors.OpExecError("ssh/hostname verification failed")
5100
5101     if self.op.readd:
5102       _RedistributeAncillaryFiles(self)
5103       self.context.ReaddNode(new_node)
5104       # make sure we redistribute the config
5105       self.cfg.Update(new_node, feedback_fn)
5106       # and make sure the new node will not have old files around
5107       if not new_node.master_candidate:
5108         result = self.rpc.call_node_demote_from_mc(new_node.name)
5109         msg = result.fail_msg
5110         if msg:
5111           self.LogWarning("Node failed to demote itself from master"
5112                           " candidate status: %s" % msg)
5113     else:
5114       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5115                                   additional_vm=self.op.vm_capable)
5116       self.context.AddNode(new_node, self.proc.GetECId())
5117
5118
5119 class LUNodeSetParams(LogicalUnit):
5120   """Modifies the parameters of a node.
5121
5122   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5123       to the node role (as _ROLE_*)
5124   @cvar _R2F: a dictionary from node role to tuples of flags
5125   @cvar _FLAGS: a list of attribute names corresponding to the flags
5126
5127   """
5128   HPATH = "node-modify"
5129   HTYPE = constants.HTYPE_NODE
5130   REQ_BGL = False
5131   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5132   _F2R = {
5133     (True, False, False): _ROLE_CANDIDATE,
5134     (False, True, False): _ROLE_DRAINED,
5135     (False, False, True): _ROLE_OFFLINE,
5136     (False, False, False): _ROLE_REGULAR,
5137     }
5138   _R2F = dict((v, k) for k, v in _F2R.items())
5139   _FLAGS = ["master_candidate", "drained", "offline"]
5140
5141   def CheckArguments(self):
5142     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5143     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5144                 self.op.master_capable, self.op.vm_capable,
5145                 self.op.secondary_ip, self.op.ndparams]
5146     if all_mods.count(None) == len(all_mods):
5147       raise errors.OpPrereqError("Please pass at least one modification",
5148                                  errors.ECODE_INVAL)
5149     if all_mods.count(True) > 1:
5150       raise errors.OpPrereqError("Can't set the node into more than one"
5151                                  " state at the same time",
5152                                  errors.ECODE_INVAL)
5153
5154     # Boolean value that tells us whether we might be demoting from MC
5155     self.might_demote = (self.op.master_candidate == False or
5156                          self.op.offline == True or
5157                          self.op.drained == True or
5158                          self.op.master_capable == False)
5159
5160     if self.op.secondary_ip:
5161       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5162         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5163                                    " address" % self.op.secondary_ip,
5164                                    errors.ECODE_INVAL)
5165
5166     self.lock_all = self.op.auto_promote and self.might_demote
5167     self.lock_instances = self.op.secondary_ip is not None
5168
5169   def ExpandNames(self):
5170     if self.lock_all:
5171       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5172     else:
5173       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5174
5175     if self.lock_instances:
5176       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5177
5178   def DeclareLocks(self, level):
5179     # If we have locked all instances, before waiting to lock nodes, release
5180     # all the ones living on nodes unrelated to the current operation.
5181     if level == locking.LEVEL_NODE and self.lock_instances:
5182       self.affected_instances = []
5183       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5184         instances_keep = []
5185
5186         # Build list of instances to release
5187         locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5188         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5189           if (instance.disk_template in constants.DTS_INT_MIRROR and
5190               self.op.node_name in instance.all_nodes):
5191             instances_keep.append(instance_name)
5192             self.affected_instances.append(instance)
5193
5194         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5195
5196         assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5197                 set(instances_keep))
5198
5199   def BuildHooksEnv(self):
5200     """Build hooks env.
5201
5202     This runs on the master node.
5203
5204     """
5205     return {
5206       "OP_TARGET": self.op.node_name,
5207       "MASTER_CANDIDATE": str(self.op.master_candidate),
5208       "OFFLINE": str(self.op.offline),
5209       "DRAINED": str(self.op.drained),
5210       "MASTER_CAPABLE": str(self.op.master_capable),
5211       "VM_CAPABLE": str(self.op.vm_capable),
5212       }
5213
5214   def BuildHooksNodes(self):
5215     """Build hooks nodes.
5216
5217     """
5218     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5219     return (nl, nl)
5220
5221   def CheckPrereq(self):
5222     """Check prerequisites.
5223
5224     This only checks the instance list against the existing names.
5225
5226     """
5227     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5228
5229     if (self.op.master_candidate is not None or
5230         self.op.drained is not None or
5231         self.op.offline is not None):
5232       # we can't change the master's node flags
5233       if self.op.node_name == self.cfg.GetMasterNode():
5234         raise errors.OpPrereqError("The master role can be changed"
5235                                    " only via master-failover",
5236                                    errors.ECODE_INVAL)
5237
5238     if self.op.master_candidate and not node.master_capable:
5239       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5240                                  " it a master candidate" % node.name,
5241                                  errors.ECODE_STATE)
5242
5243     if self.op.vm_capable == False:
5244       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5245       if ipri or isec:
5246         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5247                                    " the vm_capable flag" % node.name,
5248                                    errors.ECODE_STATE)
5249
5250     if node.master_candidate and self.might_demote and not self.lock_all:
5251       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5252       # check if after removing the current node, we're missing master
5253       # candidates
5254       (mc_remaining, mc_should, _) = \
5255           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5256       if mc_remaining < mc_should:
5257         raise errors.OpPrereqError("Not enough master candidates, please"
5258                                    " pass auto promote option to allow"
5259                                    " promotion", errors.ECODE_STATE)
5260
5261     self.old_flags = old_flags = (node.master_candidate,
5262                                   node.drained, node.offline)
5263     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5264     self.old_role = old_role = self._F2R[old_flags]
5265
5266     # Check for ineffective changes
5267     for attr in self._FLAGS:
5268       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5269         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5270         setattr(self.op, attr, None)
5271
5272     # Past this point, any flag change to False means a transition
5273     # away from the respective state, as only real changes are kept
5274
5275     # TODO: We might query the real power state if it supports OOB
5276     if _SupportsOob(self.cfg, node):
5277       if self.op.offline is False and not (node.powered or
5278                                            self.op.powered == True):
5279         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5280                                     " offline status can be reset") %
5281                                    self.op.node_name)
5282     elif self.op.powered is not None:
5283       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5284                                   " as it does not support out-of-band"
5285                                   " handling") % self.op.node_name)
5286
5287     # If we're being deofflined/drained, we'll MC ourself if needed
5288     if (self.op.drained == False or self.op.offline == False or
5289         (self.op.master_capable and not node.master_capable)):
5290       if _DecideSelfPromotion(self):
5291         self.op.master_candidate = True
5292         self.LogInfo("Auto-promoting node to master candidate")
5293
5294     # If we're no longer master capable, we'll demote ourselves from MC
5295     if self.op.master_capable == False and node.master_candidate:
5296       self.LogInfo("Demoting from master candidate")
5297       self.op.master_candidate = False
5298
5299     # Compute new role
5300     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5301     if self.op.master_candidate:
5302       new_role = self._ROLE_CANDIDATE
5303     elif self.op.drained:
5304       new_role = self._ROLE_DRAINED
5305     elif self.op.offline:
5306       new_role = self._ROLE_OFFLINE
5307     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5308       # False is still in new flags, which means we're un-setting (the
5309       # only) True flag
5310       new_role = self._ROLE_REGULAR
5311     else: # no new flags, nothing, keep old role
5312       new_role = old_role
5313
5314     self.new_role = new_role
5315
5316     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5317       # Trying to transition out of offline status
5318       result = self.rpc.call_version([node.name])[node.name]
5319       if result.fail_msg:
5320         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5321                                    " to report its version: %s" %
5322                                    (node.name, result.fail_msg),
5323                                    errors.ECODE_STATE)
5324       else:
5325         self.LogWarning("Transitioning node from offline to online state"
5326                         " without using re-add. Please make sure the node"
5327                         " is healthy!")
5328
5329     if self.op.secondary_ip:
5330       # Ok even without locking, because this can't be changed by any LU
5331       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5332       master_singlehomed = master.secondary_ip == master.primary_ip
5333       if master_singlehomed and self.op.secondary_ip:
5334         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5335                                    " homed cluster", errors.ECODE_INVAL)
5336
5337       if node.offline:
5338         if self.affected_instances:
5339           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5340                                      " node has instances (%s) configured"
5341                                      " to use it" % self.affected_instances)
5342       else:
5343         # On online nodes, check that no instances are running, and that
5344         # the node has the new ip and we can reach it.
5345         for instance in self.affected_instances:
5346           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5347
5348         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5349         if master.name != node.name:
5350           # check reachability from master secondary ip to new secondary ip
5351           if not netutils.TcpPing(self.op.secondary_ip,
5352                                   constants.DEFAULT_NODED_PORT,
5353                                   source=master.secondary_ip):
5354             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5355                                        " based ping to node daemon port",
5356                                        errors.ECODE_ENVIRON)
5357
5358     if self.op.ndparams:
5359       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5360       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5361       self.new_ndparams = new_ndparams
5362
5363   def Exec(self, feedback_fn):
5364     """Modifies a node.
5365
5366     """
5367     node = self.node
5368     old_role = self.old_role
5369     new_role = self.new_role
5370
5371     result = []
5372
5373     if self.op.ndparams:
5374       node.ndparams = self.new_ndparams
5375
5376     if self.op.powered is not None:
5377       node.powered = self.op.powered
5378
5379     for attr in ["master_capable", "vm_capable"]:
5380       val = getattr(self.op, attr)
5381       if val is not None:
5382         setattr(node, attr, val)
5383         result.append((attr, str(val)))
5384
5385     if new_role != old_role:
5386       # Tell the node to demote itself, if no longer MC and not offline
5387       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5388         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5389         if msg:
5390           self.LogWarning("Node failed to demote itself: %s", msg)
5391
5392       new_flags = self._R2F[new_role]
5393       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5394         if of != nf:
5395           result.append((desc, str(nf)))
5396       (node.master_candidate, node.drained, node.offline) = new_flags
5397
5398       # we locked all nodes, we adjust the CP before updating this node
5399       if self.lock_all:
5400         _AdjustCandidatePool(self, [node.name])
5401
5402     if self.op.secondary_ip:
5403       node.secondary_ip = self.op.secondary_ip
5404       result.append(("secondary_ip", self.op.secondary_ip))
5405
5406     # this will trigger configuration file update, if needed
5407     self.cfg.Update(node, feedback_fn)
5408
5409     # this will trigger job queue propagation or cleanup if the mc
5410     # flag changed
5411     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5412       self.context.ReaddNode(node)
5413
5414     return result
5415
5416
5417 class LUNodePowercycle(NoHooksLU):
5418   """Powercycles a node.
5419
5420   """
5421   REQ_BGL = False
5422
5423   def CheckArguments(self):
5424     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5425     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5426       raise errors.OpPrereqError("The node is the master and the force"
5427                                  " parameter was not set",
5428                                  errors.ECODE_INVAL)
5429
5430   def ExpandNames(self):
5431     """Locking for PowercycleNode.
5432
5433     This is a last-resort option and shouldn't block on other
5434     jobs. Therefore, we grab no locks.
5435
5436     """
5437     self.needed_locks = {}
5438
5439   def Exec(self, feedback_fn):
5440     """Reboots a node.
5441
5442     """
5443     result = self.rpc.call_node_powercycle(self.op.node_name,
5444                                            self.cfg.GetHypervisorType())
5445     result.Raise("Failed to schedule the reboot")
5446     return result.payload
5447
5448
5449 class LUClusterQuery(NoHooksLU):
5450   """Query cluster configuration.
5451
5452   """
5453   REQ_BGL = False
5454
5455   def ExpandNames(self):
5456     self.needed_locks = {}
5457
5458   def Exec(self, feedback_fn):
5459     """Return cluster config.
5460
5461     """
5462     cluster = self.cfg.GetClusterInfo()
5463     os_hvp = {}
5464
5465     # Filter just for enabled hypervisors
5466     for os_name, hv_dict in cluster.os_hvp.items():
5467       os_hvp[os_name] = {}
5468       for hv_name, hv_params in hv_dict.items():
5469         if hv_name in cluster.enabled_hypervisors:
5470           os_hvp[os_name][hv_name] = hv_params
5471
5472     # Convert ip_family to ip_version
5473     primary_ip_version = constants.IP4_VERSION
5474     if cluster.primary_ip_family == netutils.IP6Address.family:
5475       primary_ip_version = constants.IP6_VERSION
5476
5477     result = {
5478       "software_version": constants.RELEASE_VERSION,
5479       "protocol_version": constants.PROTOCOL_VERSION,
5480       "config_version": constants.CONFIG_VERSION,
5481       "os_api_version": max(constants.OS_API_VERSIONS),
5482       "export_version": constants.EXPORT_VERSION,
5483       "architecture": (platform.architecture()[0], platform.machine()),
5484       "name": cluster.cluster_name,
5485       "master": cluster.master_node,
5486       "default_hypervisor": cluster.enabled_hypervisors[0],
5487       "enabled_hypervisors": cluster.enabled_hypervisors,
5488       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5489                         for hypervisor_name in cluster.enabled_hypervisors]),
5490       "os_hvp": os_hvp,
5491       "beparams": cluster.beparams,
5492       "osparams": cluster.osparams,
5493       "nicparams": cluster.nicparams,
5494       "ndparams": cluster.ndparams,
5495       "candidate_pool_size": cluster.candidate_pool_size,
5496       "master_netdev": cluster.master_netdev,
5497       "volume_group_name": cluster.volume_group_name,
5498       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5499       "file_storage_dir": cluster.file_storage_dir,
5500       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5501       "maintain_node_health": cluster.maintain_node_health,
5502       "ctime": cluster.ctime,
5503       "mtime": cluster.mtime,
5504       "uuid": cluster.uuid,
5505       "tags": list(cluster.GetTags()),
5506       "uid_pool": cluster.uid_pool,
5507       "default_iallocator": cluster.default_iallocator,
5508       "reserved_lvs": cluster.reserved_lvs,
5509       "primary_ip_version": primary_ip_version,
5510       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5511       "hidden_os": cluster.hidden_os,
5512       "blacklisted_os": cluster.blacklisted_os,
5513       }
5514
5515     return result
5516
5517
5518 class LUClusterConfigQuery(NoHooksLU):
5519   """Return configuration values.
5520
5521   """
5522   REQ_BGL = False
5523   _FIELDS_DYNAMIC = utils.FieldSet()
5524   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5525                                   "watcher_pause", "volume_group_name")
5526
5527   def CheckArguments(self):
5528     _CheckOutputFields(static=self._FIELDS_STATIC,
5529                        dynamic=self._FIELDS_DYNAMIC,
5530                        selected=self.op.output_fields)
5531
5532   def ExpandNames(self):
5533     self.needed_locks = {}
5534
5535   def Exec(self, feedback_fn):
5536     """Dump a representation of the cluster config to the standard output.
5537
5538     """
5539     values = []
5540     for field in self.op.output_fields:
5541       if field == "cluster_name":
5542         entry = self.cfg.GetClusterName()
5543       elif field == "master_node":
5544         entry = self.cfg.GetMasterNode()
5545       elif field == "drain_flag":
5546         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5547       elif field == "watcher_pause":
5548         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5549       elif field == "volume_group_name":
5550         entry = self.cfg.GetVGName()
5551       else:
5552         raise errors.ParameterError(field)
5553       values.append(entry)
5554     return values
5555
5556
5557 class LUInstanceActivateDisks(NoHooksLU):
5558   """Bring up an instance's disks.
5559
5560   """
5561   REQ_BGL = False
5562
5563   def ExpandNames(self):
5564     self._ExpandAndLockInstance()
5565     self.needed_locks[locking.LEVEL_NODE] = []
5566     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5567
5568   def DeclareLocks(self, level):
5569     if level == locking.LEVEL_NODE:
5570       self._LockInstancesNodes()
5571
5572   def CheckPrereq(self):
5573     """Check prerequisites.
5574
5575     This checks that the instance is in the cluster.
5576
5577     """
5578     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5579     assert self.instance is not None, \
5580       "Cannot retrieve locked instance %s" % self.op.instance_name
5581     _CheckNodeOnline(self, self.instance.primary_node)
5582
5583   def Exec(self, feedback_fn):
5584     """Activate the disks.
5585
5586     """
5587     disks_ok, disks_info = \
5588               _AssembleInstanceDisks(self, self.instance,
5589                                      ignore_size=self.op.ignore_size)
5590     if not disks_ok:
5591       raise errors.OpExecError("Cannot activate block devices")
5592
5593     return disks_info
5594
5595
5596 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5597                            ignore_size=False):
5598   """Prepare the block devices for an instance.
5599
5600   This sets up the block devices on all nodes.
5601
5602   @type lu: L{LogicalUnit}
5603   @param lu: the logical unit on whose behalf we execute
5604   @type instance: L{objects.Instance}
5605   @param instance: the instance for whose disks we assemble
5606   @type disks: list of L{objects.Disk} or None
5607   @param disks: which disks to assemble (or all, if None)
5608   @type ignore_secondaries: boolean
5609   @param ignore_secondaries: if true, errors on secondary nodes
5610       won't result in an error return from the function
5611   @type ignore_size: boolean
5612   @param ignore_size: if true, the current known size of the disk
5613       will not be used during the disk activation, useful for cases
5614       when the size is wrong
5615   @return: False if the operation failed, otherwise a list of
5616       (host, instance_visible_name, node_visible_name)
5617       with the mapping from node devices to instance devices
5618
5619   """
5620   device_info = []
5621   disks_ok = True
5622   iname = instance.name
5623   disks = _ExpandCheckDisks(instance, disks)
5624
5625   # With the two passes mechanism we try to reduce the window of
5626   # opportunity for the race condition of switching DRBD to primary
5627   # before handshaking occured, but we do not eliminate it
5628
5629   # The proper fix would be to wait (with some limits) until the
5630   # connection has been made and drbd transitions from WFConnection
5631   # into any other network-connected state (Connected, SyncTarget,
5632   # SyncSource, etc.)
5633
5634   # 1st pass, assemble on all nodes in secondary mode
5635   for idx, inst_disk in enumerate(disks):
5636     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5637       if ignore_size:
5638         node_disk = node_disk.Copy()
5639         node_disk.UnsetSize()
5640       lu.cfg.SetDiskID(node_disk, node)
5641       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5642       msg = result.fail_msg
5643       if msg:
5644         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5645                            " (is_primary=False, pass=1): %s",
5646                            inst_disk.iv_name, node, msg)
5647         if not ignore_secondaries:
5648           disks_ok = False
5649
5650   # FIXME: race condition on drbd migration to primary
5651
5652   # 2nd pass, do only the primary node
5653   for idx, inst_disk in enumerate(disks):
5654     dev_path = None
5655
5656     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5657       if node != instance.primary_node:
5658         continue
5659       if ignore_size:
5660         node_disk = node_disk.Copy()
5661         node_disk.UnsetSize()
5662       lu.cfg.SetDiskID(node_disk, node)
5663       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5664       msg = result.fail_msg
5665       if msg:
5666         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5667                            " (is_primary=True, pass=2): %s",
5668                            inst_disk.iv_name, node, msg)
5669         disks_ok = False
5670       else:
5671         dev_path = result.payload
5672
5673     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5674
5675   # leave the disks configured for the primary node
5676   # this is a workaround that would be fixed better by
5677   # improving the logical/physical id handling
5678   for disk in disks:
5679     lu.cfg.SetDiskID(disk, instance.primary_node)
5680
5681   return disks_ok, device_info
5682
5683
5684 def _StartInstanceDisks(lu, instance, force):
5685   """Start the disks of an instance.
5686
5687   """
5688   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5689                                            ignore_secondaries=force)
5690   if not disks_ok:
5691     _ShutdownInstanceDisks(lu, instance)
5692     if force is not None and not force:
5693       lu.proc.LogWarning("", hint="If the message above refers to a"
5694                          " secondary node,"
5695                          " you can retry the operation using '--force'.")
5696     raise errors.OpExecError("Disk consistency error")
5697
5698
5699 class LUInstanceDeactivateDisks(NoHooksLU):
5700   """Shutdown an instance's disks.
5701
5702   """
5703   REQ_BGL = False
5704
5705   def ExpandNames(self):
5706     self._ExpandAndLockInstance()
5707     self.needed_locks[locking.LEVEL_NODE] = []
5708     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5709
5710   def DeclareLocks(self, level):
5711     if level == locking.LEVEL_NODE:
5712       self._LockInstancesNodes()
5713
5714   def CheckPrereq(self):
5715     """Check prerequisites.
5716
5717     This checks that the instance is in the cluster.
5718
5719     """
5720     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5721     assert self.instance is not None, \
5722       "Cannot retrieve locked instance %s" % self.op.instance_name
5723
5724   def Exec(self, feedback_fn):
5725     """Deactivate the disks
5726
5727     """
5728     instance = self.instance
5729     if self.op.force:
5730       _ShutdownInstanceDisks(self, instance)
5731     else:
5732       _SafeShutdownInstanceDisks(self, instance)
5733
5734
5735 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5736   """Shutdown block devices of an instance.
5737
5738   This function checks if an instance is running, before calling
5739   _ShutdownInstanceDisks.
5740
5741   """
5742   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5743   _ShutdownInstanceDisks(lu, instance, disks=disks)
5744
5745
5746 def _ExpandCheckDisks(instance, disks):
5747   """Return the instance disks selected by the disks list
5748
5749   @type disks: list of L{objects.Disk} or None
5750   @param disks: selected disks
5751   @rtype: list of L{objects.Disk}
5752   @return: selected instance disks to act on
5753
5754   """
5755   if disks is None:
5756     return instance.disks
5757   else:
5758     if not set(disks).issubset(instance.disks):
5759       raise errors.ProgrammerError("Can only act on disks belonging to the"
5760                                    " target instance")
5761     return disks
5762
5763
5764 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5765   """Shutdown block devices of an instance.
5766
5767   This does the shutdown on all nodes of the instance.
5768
5769   If the ignore_primary is false, errors on the primary node are
5770   ignored.
5771
5772   """
5773   all_result = True
5774   disks = _ExpandCheckDisks(instance, disks)
5775
5776   for disk in disks:
5777     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5778       lu.cfg.SetDiskID(top_disk, node)
5779       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5780       msg = result.fail_msg
5781       if msg:
5782         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5783                       disk.iv_name, node, msg)
5784         if ((node == instance.primary_node and not ignore_primary) or
5785             (node != instance.primary_node and not result.offline)):
5786           all_result = False
5787   return all_result
5788
5789
5790 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5791   """Checks if a node has enough free memory.
5792
5793   This function check if a given node has the needed amount of free
5794   memory. In case the node has less memory or we cannot get the
5795   information from the node, this function raise an OpPrereqError
5796   exception.
5797
5798   @type lu: C{LogicalUnit}
5799   @param lu: a logical unit from which we get configuration data
5800   @type node: C{str}
5801   @param node: the node to check
5802   @type reason: C{str}
5803   @param reason: string to use in the error message
5804   @type requested: C{int}
5805   @param requested: the amount of memory in MiB to check for
5806   @type hypervisor_name: C{str}
5807   @param hypervisor_name: the hypervisor to ask for memory stats
5808   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5809       we cannot check the node
5810
5811   """
5812   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5813   nodeinfo[node].Raise("Can't get data from node %s" % node,
5814                        prereq=True, ecode=errors.ECODE_ENVIRON)
5815   free_mem = nodeinfo[node].payload.get("memory_free", None)
5816   if not isinstance(free_mem, int):
5817     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5818                                " was '%s'" % (node, free_mem),
5819                                errors.ECODE_ENVIRON)
5820   if requested > free_mem:
5821     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5822                                " needed %s MiB, available %s MiB" %
5823                                (node, reason, requested, free_mem),
5824                                errors.ECODE_NORES)
5825
5826
5827 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5828   """Checks if nodes have enough free disk space in the all VGs.
5829
5830   This function check if all given nodes have the needed amount of
5831   free disk. In case any node has less disk or we cannot get the
5832   information from the node, this function raise an OpPrereqError
5833   exception.
5834
5835   @type lu: C{LogicalUnit}
5836   @param lu: a logical unit from which we get configuration data
5837   @type nodenames: C{list}
5838   @param nodenames: the list of node names to check
5839   @type req_sizes: C{dict}
5840   @param req_sizes: the hash of vg and corresponding amount of disk in
5841       MiB to check for
5842   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5843       or we cannot check the node
5844
5845   """
5846   for vg, req_size in req_sizes.items():
5847     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5848
5849
5850 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5851   """Checks if nodes have enough free disk space in the specified VG.
5852
5853   This function check if all given nodes have the needed amount of
5854   free disk. In case any node has less disk or we cannot get the
5855   information from the node, this function raise an OpPrereqError
5856   exception.
5857
5858   @type lu: C{LogicalUnit}
5859   @param lu: a logical unit from which we get configuration data
5860   @type nodenames: C{list}
5861   @param nodenames: the list of node names to check
5862   @type vg: C{str}
5863   @param vg: the volume group to check
5864   @type requested: C{int}
5865   @param requested: the amount of disk in MiB to check for
5866   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5867       or we cannot check the node
5868
5869   """
5870   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5871   for node in nodenames:
5872     info = nodeinfo[node]
5873     info.Raise("Cannot get current information from node %s" % node,
5874                prereq=True, ecode=errors.ECODE_ENVIRON)
5875     vg_free = info.payload.get("vg_free", None)
5876     if not isinstance(vg_free, int):
5877       raise errors.OpPrereqError("Can't compute free disk space on node"
5878                                  " %s for vg %s, result was '%s'" %
5879                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5880     if requested > vg_free:
5881       raise errors.OpPrereqError("Not enough disk space on target node %s"
5882                                  " vg %s: required %d MiB, available %d MiB" %
5883                                  (node, vg, requested, vg_free),
5884                                  errors.ECODE_NORES)
5885
5886
5887 class LUInstanceStartup(LogicalUnit):
5888   """Starts an instance.
5889
5890   """
5891   HPATH = "instance-start"
5892   HTYPE = constants.HTYPE_INSTANCE
5893   REQ_BGL = False
5894
5895   def CheckArguments(self):
5896     # extra beparams
5897     if self.op.beparams:
5898       # fill the beparams dict
5899       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5900
5901   def ExpandNames(self):
5902     self._ExpandAndLockInstance()
5903
5904   def BuildHooksEnv(self):
5905     """Build hooks env.
5906
5907     This runs on master, primary and secondary nodes of the instance.
5908
5909     """
5910     env = {
5911       "FORCE": self.op.force,
5912       }
5913
5914     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5915
5916     return env
5917
5918   def BuildHooksNodes(self):
5919     """Build hooks nodes.
5920
5921     """
5922     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5923     return (nl, nl)
5924
5925   def CheckPrereq(self):
5926     """Check prerequisites.
5927
5928     This checks that the instance is in the cluster.
5929
5930     """
5931     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5932     assert self.instance is not None, \
5933       "Cannot retrieve locked instance %s" % self.op.instance_name
5934
5935     # extra hvparams
5936     if self.op.hvparams:
5937       # check hypervisor parameter syntax (locally)
5938       cluster = self.cfg.GetClusterInfo()
5939       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5940       filled_hvp = cluster.FillHV(instance)
5941       filled_hvp.update(self.op.hvparams)
5942       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5943       hv_type.CheckParameterSyntax(filled_hvp)
5944       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5945
5946     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5947
5948     if self.primary_offline and self.op.ignore_offline_nodes:
5949       self.proc.LogWarning("Ignoring offline primary node")
5950
5951       if self.op.hvparams or self.op.beparams:
5952         self.proc.LogWarning("Overridden parameters are ignored")
5953     else:
5954       _CheckNodeOnline(self, instance.primary_node)
5955
5956       bep = self.cfg.GetClusterInfo().FillBE(instance)
5957
5958       # check bridges existence
5959       _CheckInstanceBridgesExist(self, instance)
5960
5961       remote_info = self.rpc.call_instance_info(instance.primary_node,
5962                                                 instance.name,
5963                                                 instance.hypervisor)
5964       remote_info.Raise("Error checking node %s" % instance.primary_node,
5965                         prereq=True, ecode=errors.ECODE_ENVIRON)
5966       if not remote_info.payload: # not running already
5967         _CheckNodeFreeMemory(self, instance.primary_node,
5968                              "starting instance %s" % instance.name,
5969                              bep[constants.BE_MEMORY], instance.hypervisor)
5970
5971   def Exec(self, feedback_fn):
5972     """Start the instance.
5973
5974     """
5975     instance = self.instance
5976     force = self.op.force
5977
5978     if not self.op.no_remember:
5979       self.cfg.MarkInstanceUp(instance.name)
5980
5981     if self.primary_offline:
5982       assert self.op.ignore_offline_nodes
5983       self.proc.LogInfo("Primary node offline, marked instance as started")
5984     else:
5985       node_current = instance.primary_node
5986
5987       _StartInstanceDisks(self, instance, force)
5988
5989       result = self.rpc.call_instance_start(node_current, instance,
5990                                             self.op.hvparams, self.op.beparams,
5991                                             self.op.startup_paused)
5992       msg = result.fail_msg
5993       if msg:
5994         _ShutdownInstanceDisks(self, instance)
5995         raise errors.OpExecError("Could not start instance: %s" % msg)
5996
5997
5998 class LUInstanceReboot(LogicalUnit):
5999   """Reboot an instance.
6000
6001   """
6002   HPATH = "instance-reboot"
6003   HTYPE = constants.HTYPE_INSTANCE
6004   REQ_BGL = False
6005
6006   def ExpandNames(self):
6007     self._ExpandAndLockInstance()
6008
6009   def BuildHooksEnv(self):
6010     """Build hooks env.
6011
6012     This runs on master, primary and secondary nodes of the instance.
6013
6014     """
6015     env = {
6016       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6017       "REBOOT_TYPE": self.op.reboot_type,
6018       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6019       }
6020
6021     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6022
6023     return env
6024
6025   def BuildHooksNodes(self):
6026     """Build hooks nodes.
6027
6028     """
6029     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6030     return (nl, nl)
6031
6032   def CheckPrereq(self):
6033     """Check prerequisites.
6034
6035     This checks that the instance is in the cluster.
6036
6037     """
6038     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6039     assert self.instance is not None, \
6040       "Cannot retrieve locked instance %s" % self.op.instance_name
6041
6042     _CheckNodeOnline(self, instance.primary_node)
6043
6044     # check bridges existence
6045     _CheckInstanceBridgesExist(self, instance)
6046
6047   def Exec(self, feedback_fn):
6048     """Reboot the instance.
6049
6050     """
6051     instance = self.instance
6052     ignore_secondaries = self.op.ignore_secondaries
6053     reboot_type = self.op.reboot_type
6054
6055     remote_info = self.rpc.call_instance_info(instance.primary_node,
6056                                               instance.name,
6057                                               instance.hypervisor)
6058     remote_info.Raise("Error checking node %s" % instance.primary_node)
6059     instance_running = bool(remote_info.payload)
6060
6061     node_current = instance.primary_node
6062
6063     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6064                                             constants.INSTANCE_REBOOT_HARD]:
6065       for disk in instance.disks:
6066         self.cfg.SetDiskID(disk, node_current)
6067       result = self.rpc.call_instance_reboot(node_current, instance,
6068                                              reboot_type,
6069                                              self.op.shutdown_timeout)
6070       result.Raise("Could not reboot instance")
6071     else:
6072       if instance_running:
6073         result = self.rpc.call_instance_shutdown(node_current, instance,
6074                                                  self.op.shutdown_timeout)
6075         result.Raise("Could not shutdown instance for full reboot")
6076         _ShutdownInstanceDisks(self, instance)
6077       else:
6078         self.LogInfo("Instance %s was already stopped, starting now",
6079                      instance.name)
6080       _StartInstanceDisks(self, instance, ignore_secondaries)
6081       result = self.rpc.call_instance_start(node_current, instance,
6082                                             None, None, False)
6083       msg = result.fail_msg
6084       if msg:
6085         _ShutdownInstanceDisks(self, instance)
6086         raise errors.OpExecError("Could not start instance for"
6087                                  " full reboot: %s" % msg)
6088
6089     self.cfg.MarkInstanceUp(instance.name)
6090
6091
6092 class LUInstanceShutdown(LogicalUnit):
6093   """Shutdown an instance.
6094
6095   """
6096   HPATH = "instance-stop"
6097   HTYPE = constants.HTYPE_INSTANCE
6098   REQ_BGL = False
6099
6100   def ExpandNames(self):
6101     self._ExpandAndLockInstance()
6102
6103   def BuildHooksEnv(self):
6104     """Build hooks env.
6105
6106     This runs on master, primary and secondary nodes of the instance.
6107
6108     """
6109     env = _BuildInstanceHookEnvByObject(self, self.instance)
6110     env["TIMEOUT"] = self.op.timeout
6111     return env
6112
6113   def BuildHooksNodes(self):
6114     """Build hooks nodes.
6115
6116     """
6117     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6118     return (nl, nl)
6119
6120   def CheckPrereq(self):
6121     """Check prerequisites.
6122
6123     This checks that the instance is in the cluster.
6124
6125     """
6126     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6127     assert self.instance is not None, \
6128       "Cannot retrieve locked instance %s" % self.op.instance_name
6129
6130     self.primary_offline = \
6131       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6132
6133     if self.primary_offline and self.op.ignore_offline_nodes:
6134       self.proc.LogWarning("Ignoring offline primary node")
6135     else:
6136       _CheckNodeOnline(self, self.instance.primary_node)
6137
6138   def Exec(self, feedback_fn):
6139     """Shutdown the instance.
6140
6141     """
6142     instance = self.instance
6143     node_current = instance.primary_node
6144     timeout = self.op.timeout
6145
6146     if not self.op.no_remember:
6147       self.cfg.MarkInstanceDown(instance.name)
6148
6149     if self.primary_offline:
6150       assert self.op.ignore_offline_nodes
6151       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6152     else:
6153       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6154       msg = result.fail_msg
6155       if msg:
6156         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6157
6158       _ShutdownInstanceDisks(self, instance)
6159
6160
6161 class LUInstanceReinstall(LogicalUnit):
6162   """Reinstall an instance.
6163
6164   """
6165   HPATH = "instance-reinstall"
6166   HTYPE = constants.HTYPE_INSTANCE
6167   REQ_BGL = False
6168
6169   def ExpandNames(self):
6170     self._ExpandAndLockInstance()
6171
6172   def BuildHooksEnv(self):
6173     """Build hooks env.
6174
6175     This runs on master, primary and secondary nodes of the instance.
6176
6177     """
6178     return _BuildInstanceHookEnvByObject(self, self.instance)
6179
6180   def BuildHooksNodes(self):
6181     """Build hooks nodes.
6182
6183     """
6184     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6185     return (nl, nl)
6186
6187   def CheckPrereq(self):
6188     """Check prerequisites.
6189
6190     This checks that the instance is in the cluster and is not running.
6191
6192     """
6193     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6194     assert instance is not None, \
6195       "Cannot retrieve locked instance %s" % self.op.instance_name
6196     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6197                      " offline, cannot reinstall")
6198     for node in instance.secondary_nodes:
6199       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6200                        " cannot reinstall")
6201
6202     if instance.disk_template == constants.DT_DISKLESS:
6203       raise errors.OpPrereqError("Instance '%s' has no disks" %
6204                                  self.op.instance_name,
6205                                  errors.ECODE_INVAL)
6206     _CheckInstanceDown(self, instance, "cannot reinstall")
6207
6208     if self.op.os_type is not None:
6209       # OS verification
6210       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6211       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6212       instance_os = self.op.os_type
6213     else:
6214       instance_os = instance.os
6215
6216     nodelist = list(instance.all_nodes)
6217
6218     if self.op.osparams:
6219       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6220       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6221       self.os_inst = i_osdict # the new dict (without defaults)
6222     else:
6223       self.os_inst = None
6224
6225     self.instance = instance
6226
6227   def Exec(self, feedback_fn):
6228     """Reinstall the instance.
6229
6230     """
6231     inst = self.instance
6232
6233     if self.op.os_type is not None:
6234       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6235       inst.os = self.op.os_type
6236       # Write to configuration
6237       self.cfg.Update(inst, feedback_fn)
6238
6239     _StartInstanceDisks(self, inst, None)
6240     try:
6241       feedback_fn("Running the instance OS create scripts...")
6242       # FIXME: pass debug option from opcode to backend
6243       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6244                                              self.op.debug_level,
6245                                              osparams=self.os_inst)
6246       result.Raise("Could not install OS for instance %s on node %s" %
6247                    (inst.name, inst.primary_node))
6248     finally:
6249       _ShutdownInstanceDisks(self, inst)
6250
6251
6252 class LUInstanceRecreateDisks(LogicalUnit):
6253   """Recreate an instance's missing disks.
6254
6255   """
6256   HPATH = "instance-recreate-disks"
6257   HTYPE = constants.HTYPE_INSTANCE
6258   REQ_BGL = False
6259
6260   def CheckArguments(self):
6261     # normalise the disk list
6262     self.op.disks = sorted(frozenset(self.op.disks))
6263
6264   def ExpandNames(self):
6265     self._ExpandAndLockInstance()
6266     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6267     if self.op.nodes:
6268       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6269       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6270     else:
6271       self.needed_locks[locking.LEVEL_NODE] = []
6272
6273   def DeclareLocks(self, level):
6274     if level == locking.LEVEL_NODE:
6275       # if we replace the nodes, we only need to lock the old primary,
6276       # otherwise we need to lock all nodes for disk re-creation
6277       primary_only = bool(self.op.nodes)
6278       self._LockInstancesNodes(primary_only=primary_only)
6279
6280   def BuildHooksEnv(self):
6281     """Build hooks env.
6282
6283     This runs on master, primary and secondary nodes of the instance.
6284
6285     """
6286     return _BuildInstanceHookEnvByObject(self, self.instance)
6287
6288   def BuildHooksNodes(self):
6289     """Build hooks nodes.
6290
6291     """
6292     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6293     return (nl, nl)
6294
6295   def CheckPrereq(self):
6296     """Check prerequisites.
6297
6298     This checks that the instance is in the cluster and is not running.
6299
6300     """
6301     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6302     assert instance is not None, \
6303       "Cannot retrieve locked instance %s" % self.op.instance_name
6304     if self.op.nodes:
6305       if len(self.op.nodes) != len(instance.all_nodes):
6306         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6307                                    " %d replacement nodes were specified" %
6308                                    (instance.name, len(instance.all_nodes),
6309                                     len(self.op.nodes)),
6310                                    errors.ECODE_INVAL)
6311       assert instance.disk_template != constants.DT_DRBD8 or \
6312           len(self.op.nodes) == 2
6313       assert instance.disk_template != constants.DT_PLAIN or \
6314           len(self.op.nodes) == 1
6315       primary_node = self.op.nodes[0]
6316     else:
6317       primary_node = instance.primary_node
6318     _CheckNodeOnline(self, primary_node)
6319
6320     if instance.disk_template == constants.DT_DISKLESS:
6321       raise errors.OpPrereqError("Instance '%s' has no disks" %
6322                                  self.op.instance_name, errors.ECODE_INVAL)
6323     # if we replace nodes *and* the old primary is offline, we don't
6324     # check
6325     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6326     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6327     if not (self.op.nodes and old_pnode.offline):
6328       _CheckInstanceDown(self, instance, "cannot recreate disks")
6329
6330     if not self.op.disks:
6331       self.op.disks = range(len(instance.disks))
6332     else:
6333       for idx in self.op.disks:
6334         if idx >= len(instance.disks):
6335           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6336                                      errors.ECODE_INVAL)
6337     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6338       raise errors.OpPrereqError("Can't recreate disks partially and"
6339                                  " change the nodes at the same time",
6340                                  errors.ECODE_INVAL)
6341     self.instance = instance
6342
6343   def Exec(self, feedback_fn):
6344     """Recreate the disks.
6345
6346     """
6347     instance = self.instance
6348
6349     to_skip = []
6350     mods = [] # keeps track of needed logical_id changes
6351
6352     for idx, disk in enumerate(instance.disks):
6353       if idx not in self.op.disks: # disk idx has not been passed in
6354         to_skip.append(idx)
6355         continue
6356       # update secondaries for disks, if needed
6357       if self.op.nodes:
6358         if disk.dev_type == constants.LD_DRBD8:
6359           # need to update the nodes and minors
6360           assert len(self.op.nodes) == 2
6361           assert len(disk.logical_id) == 6 # otherwise disk internals
6362                                            # have changed
6363           (_, _, old_port, _, _, old_secret) = disk.logical_id
6364           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6365           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6366                     new_minors[0], new_minors[1], old_secret)
6367           assert len(disk.logical_id) == len(new_id)
6368           mods.append((idx, new_id))
6369
6370     # now that we have passed all asserts above, we can apply the mods
6371     # in a single run (to avoid partial changes)
6372     for idx, new_id in mods:
6373       instance.disks[idx].logical_id = new_id
6374
6375     # change primary node, if needed
6376     if self.op.nodes:
6377       instance.primary_node = self.op.nodes[0]
6378       self.LogWarning("Changing the instance's nodes, you will have to"
6379                       " remove any disks left on the older nodes manually")
6380
6381     if self.op.nodes:
6382       self.cfg.Update(instance, feedback_fn)
6383
6384     _CreateDisks(self, instance, to_skip=to_skip)
6385
6386
6387 class LUInstanceRename(LogicalUnit):
6388   """Rename an instance.
6389
6390   """
6391   HPATH = "instance-rename"
6392   HTYPE = constants.HTYPE_INSTANCE
6393
6394   def CheckArguments(self):
6395     """Check arguments.
6396
6397     """
6398     if self.op.ip_check and not self.op.name_check:
6399       # TODO: make the ip check more flexible and not depend on the name check
6400       raise errors.OpPrereqError("IP address check requires a name check",
6401                                  errors.ECODE_INVAL)
6402
6403   def BuildHooksEnv(self):
6404     """Build hooks env.
6405
6406     This runs on master, primary and secondary nodes of the instance.
6407
6408     """
6409     env = _BuildInstanceHookEnvByObject(self, self.instance)
6410     env["INSTANCE_NEW_NAME"] = self.op.new_name
6411     return env
6412
6413   def BuildHooksNodes(self):
6414     """Build hooks nodes.
6415
6416     """
6417     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6418     return (nl, nl)
6419
6420   def CheckPrereq(self):
6421     """Check prerequisites.
6422
6423     This checks that the instance is in the cluster and is not running.
6424
6425     """
6426     self.op.instance_name = _ExpandInstanceName(self.cfg,
6427                                                 self.op.instance_name)
6428     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6429     assert instance is not None
6430     _CheckNodeOnline(self, instance.primary_node)
6431     _CheckInstanceDown(self, instance, "cannot rename")
6432     self.instance = instance
6433
6434     new_name = self.op.new_name
6435     if self.op.name_check:
6436       hostname = netutils.GetHostname(name=new_name)
6437       if hostname != new_name:
6438         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6439                      hostname.name)
6440       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6441         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6442                                     " same as given hostname '%s'") %
6443                                     (hostname.name, self.op.new_name),
6444                                     errors.ECODE_INVAL)
6445       new_name = self.op.new_name = hostname.name
6446       if (self.op.ip_check and
6447           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6448         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6449                                    (hostname.ip, new_name),
6450                                    errors.ECODE_NOTUNIQUE)
6451
6452     instance_list = self.cfg.GetInstanceList()
6453     if new_name in instance_list and new_name != instance.name:
6454       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6455                                  new_name, errors.ECODE_EXISTS)
6456
6457   def Exec(self, feedback_fn):
6458     """Rename the instance.
6459
6460     """
6461     inst = self.instance
6462     old_name = inst.name
6463
6464     rename_file_storage = False
6465     if (inst.disk_template in constants.DTS_FILEBASED and
6466         self.op.new_name != inst.name):
6467       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6468       rename_file_storage = True
6469
6470     self.cfg.RenameInstance(inst.name, self.op.new_name)
6471     # Change the instance lock. This is definitely safe while we hold the BGL.
6472     # Otherwise the new lock would have to be added in acquired mode.
6473     assert self.REQ_BGL
6474     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6475     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6476
6477     # re-read the instance from the configuration after rename
6478     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6479
6480     if rename_file_storage:
6481       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6482       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6483                                                      old_file_storage_dir,
6484                                                      new_file_storage_dir)
6485       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6486                    " (but the instance has been renamed in Ganeti)" %
6487                    (inst.primary_node, old_file_storage_dir,
6488                     new_file_storage_dir))
6489
6490     _StartInstanceDisks(self, inst, None)
6491     try:
6492       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6493                                                  old_name, self.op.debug_level)
6494       msg = result.fail_msg
6495       if msg:
6496         msg = ("Could not run OS rename script for instance %s on node %s"
6497                " (but the instance has been renamed in Ganeti): %s" %
6498                (inst.name, inst.primary_node, msg))
6499         self.proc.LogWarning(msg)
6500     finally:
6501       _ShutdownInstanceDisks(self, inst)
6502
6503     return inst.name
6504
6505
6506 class LUInstanceRemove(LogicalUnit):
6507   """Remove an instance.
6508
6509   """
6510   HPATH = "instance-remove"
6511   HTYPE = constants.HTYPE_INSTANCE
6512   REQ_BGL = False
6513
6514   def ExpandNames(self):
6515     self._ExpandAndLockInstance()
6516     self.needed_locks[locking.LEVEL_NODE] = []
6517     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6518
6519   def DeclareLocks(self, level):
6520     if level == locking.LEVEL_NODE:
6521       self._LockInstancesNodes()
6522
6523   def BuildHooksEnv(self):
6524     """Build hooks env.
6525
6526     This runs on master, primary and secondary nodes of the instance.
6527
6528     """
6529     env = _BuildInstanceHookEnvByObject(self, self.instance)
6530     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6531     return env
6532
6533   def BuildHooksNodes(self):
6534     """Build hooks nodes.
6535
6536     """
6537     nl = [self.cfg.GetMasterNode()]
6538     nl_post = list(self.instance.all_nodes) + nl
6539     return (nl, nl_post)
6540
6541   def CheckPrereq(self):
6542     """Check prerequisites.
6543
6544     This checks that the instance is in the cluster.
6545
6546     """
6547     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6548     assert self.instance is not None, \
6549       "Cannot retrieve locked instance %s" % self.op.instance_name
6550
6551   def Exec(self, feedback_fn):
6552     """Remove the instance.
6553
6554     """
6555     instance = self.instance
6556     logging.info("Shutting down instance %s on node %s",
6557                  instance.name, instance.primary_node)
6558
6559     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6560                                              self.op.shutdown_timeout)
6561     msg = result.fail_msg
6562     if msg:
6563       if self.op.ignore_failures:
6564         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6565       else:
6566         raise errors.OpExecError("Could not shutdown instance %s on"
6567                                  " node %s: %s" %
6568                                  (instance.name, instance.primary_node, msg))
6569
6570     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6571
6572
6573 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6574   """Utility function to remove an instance.
6575
6576   """
6577   logging.info("Removing block devices for instance %s", instance.name)
6578
6579   if not _RemoveDisks(lu, instance):
6580     if not ignore_failures:
6581       raise errors.OpExecError("Can't remove instance's disks")
6582     feedback_fn("Warning: can't remove instance's disks")
6583
6584   logging.info("Removing instance %s out of cluster config", instance.name)
6585
6586   lu.cfg.RemoveInstance(instance.name)
6587
6588   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6589     "Instance lock removal conflict"
6590
6591   # Remove lock for the instance
6592   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6593
6594
6595 class LUInstanceQuery(NoHooksLU):
6596   """Logical unit for querying instances.
6597
6598   """
6599   # pylint: disable=W0142
6600   REQ_BGL = False
6601
6602   def CheckArguments(self):
6603     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6604                              self.op.output_fields, self.op.use_locking)
6605
6606   def ExpandNames(self):
6607     self.iq.ExpandNames(self)
6608
6609   def DeclareLocks(self, level):
6610     self.iq.DeclareLocks(self, level)
6611
6612   def Exec(self, feedback_fn):
6613     return self.iq.OldStyleQuery(self)
6614
6615
6616 class LUInstanceFailover(LogicalUnit):
6617   """Failover an instance.
6618
6619   """
6620   HPATH = "instance-failover"
6621   HTYPE = constants.HTYPE_INSTANCE
6622   REQ_BGL = False
6623
6624   def CheckArguments(self):
6625     """Check the arguments.
6626
6627     """
6628     self.iallocator = getattr(self.op, "iallocator", None)
6629     self.target_node = getattr(self.op, "target_node", None)
6630
6631   def ExpandNames(self):
6632     self._ExpandAndLockInstance()
6633
6634     if self.op.target_node is not None:
6635       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6636
6637     self.needed_locks[locking.LEVEL_NODE] = []
6638     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6639
6640     ignore_consistency = self.op.ignore_consistency
6641     shutdown_timeout = self.op.shutdown_timeout
6642     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6643                                        cleanup=False,
6644                                        failover=True,
6645                                        ignore_consistency=ignore_consistency,
6646                                        shutdown_timeout=shutdown_timeout)
6647     self.tasklets = [self._migrater]
6648
6649   def DeclareLocks(self, level):
6650     if level == locking.LEVEL_NODE:
6651       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6652       if instance.disk_template in constants.DTS_EXT_MIRROR:
6653         if self.op.target_node is None:
6654           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6655         else:
6656           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6657                                                    self.op.target_node]
6658         del self.recalculate_locks[locking.LEVEL_NODE]
6659       else:
6660         self._LockInstancesNodes()
6661
6662   def BuildHooksEnv(self):
6663     """Build hooks env.
6664
6665     This runs on master, primary and secondary nodes of the instance.
6666
6667     """
6668     instance = self._migrater.instance
6669     source_node = instance.primary_node
6670     target_node = self.op.target_node
6671     env = {
6672       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6673       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6674       "OLD_PRIMARY": source_node,
6675       "NEW_PRIMARY": target_node,
6676       }
6677
6678     if instance.disk_template in constants.DTS_INT_MIRROR:
6679       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6680       env["NEW_SECONDARY"] = source_node
6681     else:
6682       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6683
6684     env.update(_BuildInstanceHookEnvByObject(self, instance))
6685
6686     return env
6687
6688   def BuildHooksNodes(self):
6689     """Build hooks nodes.
6690
6691     """
6692     instance = self._migrater.instance
6693     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6694     return (nl, nl + [instance.primary_node])
6695
6696
6697 class LUInstanceMigrate(LogicalUnit):
6698   """Migrate an instance.
6699
6700   This is migration without shutting down, compared to the failover,
6701   which is done with shutdown.
6702
6703   """
6704   HPATH = "instance-migrate"
6705   HTYPE = constants.HTYPE_INSTANCE
6706   REQ_BGL = False
6707
6708   def ExpandNames(self):
6709     self._ExpandAndLockInstance()
6710
6711     if self.op.target_node is not None:
6712       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6713
6714     self.needed_locks[locking.LEVEL_NODE] = []
6715     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6716
6717     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6718                                        cleanup=self.op.cleanup,
6719                                        failover=False,
6720                                        fallback=self.op.allow_failover)
6721     self.tasklets = [self._migrater]
6722
6723   def DeclareLocks(self, level):
6724     if level == locking.LEVEL_NODE:
6725       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6726       if instance.disk_template in constants.DTS_EXT_MIRROR:
6727         if self.op.target_node is None:
6728           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6729         else:
6730           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6731                                                    self.op.target_node]
6732         del self.recalculate_locks[locking.LEVEL_NODE]
6733       else:
6734         self._LockInstancesNodes()
6735
6736   def BuildHooksEnv(self):
6737     """Build hooks env.
6738
6739     This runs on master, primary and secondary nodes of the instance.
6740
6741     """
6742     instance = self._migrater.instance
6743     source_node = instance.primary_node
6744     target_node = self.op.target_node
6745     env = _BuildInstanceHookEnvByObject(self, instance)
6746     env.update({
6747       "MIGRATE_LIVE": self._migrater.live,
6748       "MIGRATE_CLEANUP": self.op.cleanup,
6749       "OLD_PRIMARY": source_node,
6750       "NEW_PRIMARY": target_node,
6751       })
6752
6753     if instance.disk_template in constants.DTS_INT_MIRROR:
6754       env["OLD_SECONDARY"] = target_node
6755       env["NEW_SECONDARY"] = source_node
6756     else:
6757       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6758
6759     return env
6760
6761   def BuildHooksNodes(self):
6762     """Build hooks nodes.
6763
6764     """
6765     instance = self._migrater.instance
6766     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6767     return (nl, nl + [instance.primary_node])
6768
6769
6770 class LUInstanceMove(LogicalUnit):
6771   """Move an instance by data-copying.
6772
6773   """
6774   HPATH = "instance-move"
6775   HTYPE = constants.HTYPE_INSTANCE
6776   REQ_BGL = False
6777
6778   def ExpandNames(self):
6779     self._ExpandAndLockInstance()
6780     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6781     self.op.target_node = target_node
6782     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6783     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6784
6785   def DeclareLocks(self, level):
6786     if level == locking.LEVEL_NODE:
6787       self._LockInstancesNodes(primary_only=True)
6788
6789   def BuildHooksEnv(self):
6790     """Build hooks env.
6791
6792     This runs on master, primary and secondary nodes of the instance.
6793
6794     """
6795     env = {
6796       "TARGET_NODE": self.op.target_node,
6797       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6798       }
6799     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6800     return env
6801
6802   def BuildHooksNodes(self):
6803     """Build hooks nodes.
6804
6805     """
6806     nl = [
6807       self.cfg.GetMasterNode(),
6808       self.instance.primary_node,
6809       self.op.target_node,
6810       ]
6811     return (nl, nl)
6812
6813   def CheckPrereq(self):
6814     """Check prerequisites.
6815
6816     This checks that the instance is in the cluster.
6817
6818     """
6819     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6820     assert self.instance is not None, \
6821       "Cannot retrieve locked instance %s" % self.op.instance_name
6822
6823     node = self.cfg.GetNodeInfo(self.op.target_node)
6824     assert node is not None, \
6825       "Cannot retrieve locked node %s" % self.op.target_node
6826
6827     self.target_node = target_node = node.name
6828
6829     if target_node == instance.primary_node:
6830       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6831                                  (instance.name, target_node),
6832                                  errors.ECODE_STATE)
6833
6834     bep = self.cfg.GetClusterInfo().FillBE(instance)
6835
6836     for idx, dsk in enumerate(instance.disks):
6837       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6838         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6839                                    " cannot copy" % idx, errors.ECODE_STATE)
6840
6841     _CheckNodeOnline(self, target_node)
6842     _CheckNodeNotDrained(self, target_node)
6843     _CheckNodeVmCapable(self, target_node)
6844
6845     if instance.admin_up:
6846       # check memory requirements on the secondary node
6847       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6848                            instance.name, bep[constants.BE_MEMORY],
6849                            instance.hypervisor)
6850     else:
6851       self.LogInfo("Not checking memory on the secondary node as"
6852                    " instance will not be started")
6853
6854     # check bridge existance
6855     _CheckInstanceBridgesExist(self, instance, node=target_node)
6856
6857   def Exec(self, feedback_fn):
6858     """Move an instance.
6859
6860     The move is done by shutting it down on its present node, copying
6861     the data over (slow) and starting it on the new node.
6862
6863     """
6864     instance = self.instance
6865
6866     source_node = instance.primary_node
6867     target_node = self.target_node
6868
6869     self.LogInfo("Shutting down instance %s on source node %s",
6870                  instance.name, source_node)
6871
6872     result = self.rpc.call_instance_shutdown(source_node, instance,
6873                                              self.op.shutdown_timeout)
6874     msg = result.fail_msg
6875     if msg:
6876       if self.op.ignore_consistency:
6877         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6878                              " Proceeding anyway. Please make sure node"
6879                              " %s is down. Error details: %s",
6880                              instance.name, source_node, source_node, msg)
6881       else:
6882         raise errors.OpExecError("Could not shutdown instance %s on"
6883                                  " node %s: %s" %
6884                                  (instance.name, source_node, msg))
6885
6886     # create the target disks
6887     try:
6888       _CreateDisks(self, instance, target_node=target_node)
6889     except errors.OpExecError:
6890       self.LogWarning("Device creation failed, reverting...")
6891       try:
6892         _RemoveDisks(self, instance, target_node=target_node)
6893       finally:
6894         self.cfg.ReleaseDRBDMinors(instance.name)
6895         raise
6896
6897     cluster_name = self.cfg.GetClusterInfo().cluster_name
6898
6899     errs = []
6900     # activate, get path, copy the data over
6901     for idx, disk in enumerate(instance.disks):
6902       self.LogInfo("Copying data for disk %d", idx)
6903       result = self.rpc.call_blockdev_assemble(target_node, disk,
6904                                                instance.name, True, idx)
6905       if result.fail_msg:
6906         self.LogWarning("Can't assemble newly created disk %d: %s",
6907                         idx, result.fail_msg)
6908         errs.append(result.fail_msg)
6909         break
6910       dev_path = result.payload
6911       result = self.rpc.call_blockdev_export(source_node, disk,
6912                                              target_node, dev_path,
6913                                              cluster_name)
6914       if result.fail_msg:
6915         self.LogWarning("Can't copy data over for disk %d: %s",
6916                         idx, result.fail_msg)
6917         errs.append(result.fail_msg)
6918         break
6919
6920     if errs:
6921       self.LogWarning("Some disks failed to copy, aborting")
6922       try:
6923         _RemoveDisks(self, instance, target_node=target_node)
6924       finally:
6925         self.cfg.ReleaseDRBDMinors(instance.name)
6926         raise errors.OpExecError("Errors during disk copy: %s" %
6927                                  (",".join(errs),))
6928
6929     instance.primary_node = target_node
6930     self.cfg.Update(instance, feedback_fn)
6931
6932     self.LogInfo("Removing the disks on the original node")
6933     _RemoveDisks(self, instance, target_node=source_node)
6934
6935     # Only start the instance if it's marked as up
6936     if instance.admin_up:
6937       self.LogInfo("Starting instance %s on node %s",
6938                    instance.name, target_node)
6939
6940       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6941                                            ignore_secondaries=True)
6942       if not disks_ok:
6943         _ShutdownInstanceDisks(self, instance)
6944         raise errors.OpExecError("Can't activate the instance's disks")
6945
6946       result = self.rpc.call_instance_start(target_node, instance,
6947                                             None, None, False)
6948       msg = result.fail_msg
6949       if msg:
6950         _ShutdownInstanceDisks(self, instance)
6951         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6952                                  (instance.name, target_node, msg))
6953
6954
6955 class LUNodeMigrate(LogicalUnit):
6956   """Migrate all instances from a node.
6957
6958   """
6959   HPATH = "node-migrate"
6960   HTYPE = constants.HTYPE_NODE
6961   REQ_BGL = False
6962
6963   def CheckArguments(self):
6964     pass
6965
6966   def ExpandNames(self):
6967     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6968
6969     self.share_locks = _ShareAll()
6970     self.needed_locks = {
6971       locking.LEVEL_NODE: [self.op.node_name],
6972       }
6973
6974   def BuildHooksEnv(self):
6975     """Build hooks env.
6976
6977     This runs on the master, the primary and all the secondaries.
6978
6979     """
6980     return {
6981       "NODE_NAME": self.op.node_name,
6982       }
6983
6984   def BuildHooksNodes(self):
6985     """Build hooks nodes.
6986
6987     """
6988     nl = [self.cfg.GetMasterNode()]
6989     return (nl, nl)
6990
6991   def CheckPrereq(self):
6992     pass
6993
6994   def Exec(self, feedback_fn):
6995     # Prepare jobs for migration instances
6996     jobs = [
6997       [opcodes.OpInstanceMigrate(instance_name=inst.name,
6998                                  mode=self.op.mode,
6999                                  live=self.op.live,
7000                                  iallocator=self.op.iallocator,
7001                                  target_node=self.op.target_node)]
7002       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7003       ]
7004
7005     # TODO: Run iallocator in this opcode and pass correct placement options to
7006     # OpInstanceMigrate. Since other jobs can modify the cluster between
7007     # running the iallocator and the actual migration, a good consistency model
7008     # will have to be found.
7009
7010     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7011             frozenset([self.op.node_name]))
7012
7013     return ResultWithJobs(jobs)
7014
7015
7016 class TLMigrateInstance(Tasklet):
7017   """Tasklet class for instance migration.
7018
7019   @type live: boolean
7020   @ivar live: whether the migration will be done live or non-live;
7021       this variable is initalized only after CheckPrereq has run
7022   @type cleanup: boolean
7023   @ivar cleanup: Wheater we cleanup from a failed migration
7024   @type iallocator: string
7025   @ivar iallocator: The iallocator used to determine target_node
7026   @type target_node: string
7027   @ivar target_node: If given, the target_node to reallocate the instance to
7028   @type failover: boolean
7029   @ivar failover: Whether operation results in failover or migration
7030   @type fallback: boolean
7031   @ivar fallback: Whether fallback to failover is allowed if migration not
7032                   possible
7033   @type ignore_consistency: boolean
7034   @ivar ignore_consistency: Wheter we should ignore consistency between source
7035                             and target node
7036   @type shutdown_timeout: int
7037   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7038
7039   """
7040   def __init__(self, lu, instance_name, cleanup=False,
7041                failover=False, fallback=False,
7042                ignore_consistency=False,
7043                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7044     """Initializes this class.
7045
7046     """
7047     Tasklet.__init__(self, lu)
7048
7049     # Parameters
7050     self.instance_name = instance_name
7051     self.cleanup = cleanup
7052     self.live = False # will be overridden later
7053     self.failover = failover
7054     self.fallback = fallback
7055     self.ignore_consistency = ignore_consistency
7056     self.shutdown_timeout = shutdown_timeout
7057
7058   def CheckPrereq(self):
7059     """Check prerequisites.
7060
7061     This checks that the instance is in the cluster.
7062
7063     """
7064     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7065     instance = self.cfg.GetInstanceInfo(instance_name)
7066     assert instance is not None
7067     self.instance = instance
7068
7069     if (not self.cleanup and not instance.admin_up and not self.failover and
7070         self.fallback):
7071       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7072                       " to failover")
7073       self.failover = True
7074
7075     if instance.disk_template not in constants.DTS_MIRRORED:
7076       if self.failover:
7077         text = "failovers"
7078       else:
7079         text = "migrations"
7080       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7081                                  " %s" % (instance.disk_template, text),
7082                                  errors.ECODE_STATE)
7083
7084     if instance.disk_template in constants.DTS_EXT_MIRROR:
7085       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7086
7087       if self.lu.op.iallocator:
7088         self._RunAllocator()
7089       else:
7090         # We set set self.target_node as it is required by
7091         # BuildHooksEnv
7092         self.target_node = self.lu.op.target_node
7093
7094       # self.target_node is already populated, either directly or by the
7095       # iallocator run
7096       target_node = self.target_node
7097       if self.target_node == instance.primary_node:
7098         raise errors.OpPrereqError("Cannot migrate instance %s"
7099                                    " to its primary (%s)" %
7100                                    (instance.name, instance.primary_node))
7101
7102       if len(self.lu.tasklets) == 1:
7103         # It is safe to release locks only when we're the only tasklet
7104         # in the LU
7105         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7106                       keep=[instance.primary_node, self.target_node])
7107
7108     else:
7109       secondary_nodes = instance.secondary_nodes
7110       if not secondary_nodes:
7111         raise errors.ConfigurationError("No secondary node but using"
7112                                         " %s disk template" %
7113                                         instance.disk_template)
7114       target_node = secondary_nodes[0]
7115       if self.lu.op.iallocator or (self.lu.op.target_node and
7116                                    self.lu.op.target_node != target_node):
7117         if self.failover:
7118           text = "failed over"
7119         else:
7120           text = "migrated"
7121         raise errors.OpPrereqError("Instances with disk template %s cannot"
7122                                    " be %s to arbitrary nodes"
7123                                    " (neither an iallocator nor a target"
7124                                    " node can be passed)" %
7125                                    (instance.disk_template, text),
7126                                    errors.ECODE_INVAL)
7127
7128     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7129
7130     # check memory requirements on the secondary node
7131     if not self.failover or instance.admin_up:
7132       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7133                            instance.name, i_be[constants.BE_MEMORY],
7134                            instance.hypervisor)
7135     else:
7136       self.lu.LogInfo("Not checking memory on the secondary node as"
7137                       " instance will not be started")
7138
7139     # check bridge existance
7140     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7141
7142     if not self.cleanup:
7143       _CheckNodeNotDrained(self.lu, target_node)
7144       if not self.failover:
7145         result = self.rpc.call_instance_migratable(instance.primary_node,
7146                                                    instance)
7147         if result.fail_msg and self.fallback:
7148           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7149                           " failover")
7150           self.failover = True
7151         else:
7152           result.Raise("Can't migrate, please use failover",
7153                        prereq=True, ecode=errors.ECODE_STATE)
7154
7155     assert not (self.failover and self.cleanup)
7156
7157     if not self.failover:
7158       if self.lu.op.live is not None and self.lu.op.mode is not None:
7159         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7160                                    " parameters are accepted",
7161                                    errors.ECODE_INVAL)
7162       if self.lu.op.live is not None:
7163         if self.lu.op.live:
7164           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7165         else:
7166           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7167         # reset the 'live' parameter to None so that repeated
7168         # invocations of CheckPrereq do not raise an exception
7169         self.lu.op.live = None
7170       elif self.lu.op.mode is None:
7171         # read the default value from the hypervisor
7172         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7173                                                 skip_globals=False)
7174         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7175
7176       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7177     else:
7178       # Failover is never live
7179       self.live = False
7180
7181   def _RunAllocator(self):
7182     """Run the allocator based on input opcode.
7183
7184     """
7185     ial = IAllocator(self.cfg, self.rpc,
7186                      mode=constants.IALLOCATOR_MODE_RELOC,
7187                      name=self.instance_name,
7188                      # TODO See why hail breaks with a single node below
7189                      relocate_from=[self.instance.primary_node,
7190                                     self.instance.primary_node],
7191                      )
7192
7193     ial.Run(self.lu.op.iallocator)
7194
7195     if not ial.success:
7196       raise errors.OpPrereqError("Can't compute nodes using"
7197                                  " iallocator '%s': %s" %
7198                                  (self.lu.op.iallocator, ial.info),
7199                                  errors.ECODE_NORES)
7200     if len(ial.result) != ial.required_nodes:
7201       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7202                                  " of nodes (%s), required %s" %
7203                                  (self.lu.op.iallocator, len(ial.result),
7204                                   ial.required_nodes), errors.ECODE_FAULT)
7205     self.target_node = ial.result[0]
7206     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7207                  self.instance_name, self.lu.op.iallocator,
7208                  utils.CommaJoin(ial.result))
7209
7210   def _WaitUntilSync(self):
7211     """Poll with custom rpc for disk sync.
7212
7213     This uses our own step-based rpc call.
7214
7215     """
7216     self.feedback_fn("* wait until resync is done")
7217     all_done = False
7218     while not all_done:
7219       all_done = True
7220       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7221                                             self.nodes_ip,
7222                                             self.instance.disks)
7223       min_percent = 100
7224       for node, nres in result.items():
7225         nres.Raise("Cannot resync disks on node %s" % node)
7226         node_done, node_percent = nres.payload
7227         all_done = all_done and node_done
7228         if node_percent is not None:
7229           min_percent = min(min_percent, node_percent)
7230       if not all_done:
7231         if min_percent < 100:
7232           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7233         time.sleep(2)
7234
7235   def _EnsureSecondary(self, node):
7236     """Demote a node to secondary.
7237
7238     """
7239     self.feedback_fn("* switching node %s to secondary mode" % node)
7240
7241     for dev in self.instance.disks:
7242       self.cfg.SetDiskID(dev, node)
7243
7244     result = self.rpc.call_blockdev_close(node, self.instance.name,
7245                                           self.instance.disks)
7246     result.Raise("Cannot change disk to secondary on node %s" % node)
7247
7248   def _GoStandalone(self):
7249     """Disconnect from the network.
7250
7251     """
7252     self.feedback_fn("* changing into standalone mode")
7253     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7254                                                self.instance.disks)
7255     for node, nres in result.items():
7256       nres.Raise("Cannot disconnect disks node %s" % node)
7257
7258   def _GoReconnect(self, multimaster):
7259     """Reconnect to the network.
7260
7261     """
7262     if multimaster:
7263       msg = "dual-master"
7264     else:
7265       msg = "single-master"
7266     self.feedback_fn("* changing disks into %s mode" % msg)
7267     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7268                                            self.instance.disks,
7269                                            self.instance.name, multimaster)
7270     for node, nres in result.items():
7271       nres.Raise("Cannot change disks config on node %s" % node)
7272
7273   def _ExecCleanup(self):
7274     """Try to cleanup after a failed migration.
7275
7276     The cleanup is done by:
7277       - check that the instance is running only on one node
7278         (and update the config if needed)
7279       - change disks on its secondary node to secondary
7280       - wait until disks are fully synchronized
7281       - disconnect from the network
7282       - change disks into single-master mode
7283       - wait again until disks are fully synchronized
7284
7285     """
7286     instance = self.instance
7287     target_node = self.target_node
7288     source_node = self.source_node
7289
7290     # check running on only one node
7291     self.feedback_fn("* checking where the instance actually runs"
7292                      " (if this hangs, the hypervisor might be in"
7293                      " a bad state)")
7294     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7295     for node, result in ins_l.items():
7296       result.Raise("Can't contact node %s" % node)
7297
7298     runningon_source = instance.name in ins_l[source_node].payload
7299     runningon_target = instance.name in ins_l[target_node].payload
7300
7301     if runningon_source and runningon_target:
7302       raise errors.OpExecError("Instance seems to be running on two nodes,"
7303                                " or the hypervisor is confused; you will have"
7304                                " to ensure manually that it runs only on one"
7305                                " and restart this operation")
7306
7307     if not (runningon_source or runningon_target):
7308       raise errors.OpExecError("Instance does not seem to be running at all;"
7309                                " in this case it's safer to repair by"
7310                                " running 'gnt-instance stop' to ensure disk"
7311                                " shutdown, and then restarting it")
7312
7313     if runningon_target:
7314       # the migration has actually succeeded, we need to update the config
7315       self.feedback_fn("* instance running on secondary node (%s),"
7316                        " updating config" % target_node)
7317       instance.primary_node = target_node
7318       self.cfg.Update(instance, self.feedback_fn)
7319       demoted_node = source_node
7320     else:
7321       self.feedback_fn("* instance confirmed to be running on its"
7322                        " primary node (%s)" % source_node)
7323       demoted_node = target_node
7324
7325     if instance.disk_template in constants.DTS_INT_MIRROR:
7326       self._EnsureSecondary(demoted_node)
7327       try:
7328         self._WaitUntilSync()
7329       except errors.OpExecError:
7330         # we ignore here errors, since if the device is standalone, it
7331         # won't be able to sync
7332         pass
7333       self._GoStandalone()
7334       self._GoReconnect(False)
7335       self._WaitUntilSync()
7336
7337     self.feedback_fn("* done")
7338
7339   def _RevertDiskStatus(self):
7340     """Try to revert the disk status after a failed migration.
7341
7342     """
7343     target_node = self.target_node
7344     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7345       return
7346
7347     try:
7348       self._EnsureSecondary(target_node)
7349       self._GoStandalone()
7350       self._GoReconnect(False)
7351       self._WaitUntilSync()
7352     except errors.OpExecError, err:
7353       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7354                          " please try to recover the instance manually;"
7355                          " error '%s'" % str(err))
7356
7357   def _AbortMigration(self):
7358     """Call the hypervisor code to abort a started migration.
7359
7360     """
7361     instance = self.instance
7362     target_node = self.target_node
7363     migration_info = self.migration_info
7364
7365     abort_result = self.rpc.call_finalize_migration(target_node,
7366                                                     instance,
7367                                                     migration_info,
7368                                                     False)
7369     abort_msg = abort_result.fail_msg
7370     if abort_msg:
7371       logging.error("Aborting migration failed on target node %s: %s",
7372                     target_node, abort_msg)
7373       # Don't raise an exception here, as we stil have to try to revert the
7374       # disk status, even if this step failed.
7375
7376   def _ExecMigration(self):
7377     """Migrate an instance.
7378
7379     The migrate is done by:
7380       - change the disks into dual-master mode
7381       - wait until disks are fully synchronized again
7382       - migrate the instance
7383       - change disks on the new secondary node (the old primary) to secondary
7384       - wait until disks are fully synchronized
7385       - change disks into single-master mode
7386
7387     """
7388     instance = self.instance
7389     target_node = self.target_node
7390     source_node = self.source_node
7391
7392     self.feedback_fn("* checking disk consistency between source and target")
7393     for dev in instance.disks:
7394       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7395         raise errors.OpExecError("Disk %s is degraded or not fully"
7396                                  " synchronized on target node,"
7397                                  " aborting migration" % dev.iv_name)
7398
7399     # First get the migration information from the remote node
7400     result = self.rpc.call_migration_info(source_node, instance)
7401     msg = result.fail_msg
7402     if msg:
7403       log_err = ("Failed fetching source migration information from %s: %s" %
7404                  (source_node, msg))
7405       logging.error(log_err)
7406       raise errors.OpExecError(log_err)
7407
7408     self.migration_info = migration_info = result.payload
7409
7410     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7411       # Then switch the disks to master/master mode
7412       self._EnsureSecondary(target_node)
7413       self._GoStandalone()
7414       self._GoReconnect(True)
7415       self._WaitUntilSync()
7416
7417     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7418     result = self.rpc.call_accept_instance(target_node,
7419                                            instance,
7420                                            migration_info,
7421                                            self.nodes_ip[target_node])
7422
7423     msg = result.fail_msg
7424     if msg:
7425       logging.error("Instance pre-migration failed, trying to revert"
7426                     " disk status: %s", msg)
7427       self.feedback_fn("Pre-migration failed, aborting")
7428       self._AbortMigration()
7429       self._RevertDiskStatus()
7430       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7431                                (instance.name, msg))
7432
7433     self.feedback_fn("* migrating instance to %s" % target_node)
7434     result = self.rpc.call_instance_migrate(source_node, instance,
7435                                             self.nodes_ip[target_node],
7436                                             self.live)
7437     msg = result.fail_msg
7438     if msg:
7439       logging.error("Instance migration failed, trying to revert"
7440                     " disk status: %s", msg)
7441       self.feedback_fn("Migration failed, aborting")
7442       self._AbortMigration()
7443       self._RevertDiskStatus()
7444       raise errors.OpExecError("Could not migrate instance %s: %s" %
7445                                (instance.name, msg))
7446
7447     instance.primary_node = target_node
7448     # distribute new instance config to the other nodes
7449     self.cfg.Update(instance, self.feedback_fn)
7450
7451     result = self.rpc.call_finalize_migration(target_node,
7452                                               instance,
7453                                               migration_info,
7454                                               True)
7455     msg = result.fail_msg
7456     if msg:
7457       logging.error("Instance migration succeeded, but finalization failed:"
7458                     " %s", msg)
7459       raise errors.OpExecError("Could not finalize instance migration: %s" %
7460                                msg)
7461
7462     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7463       self._EnsureSecondary(source_node)
7464       self._WaitUntilSync()
7465       self._GoStandalone()
7466       self._GoReconnect(False)
7467       self._WaitUntilSync()
7468
7469     self.feedback_fn("* done")
7470
7471   def _ExecFailover(self):
7472     """Failover an instance.
7473
7474     The failover is done by shutting it down on its present node and
7475     starting it on the secondary.
7476
7477     """
7478     instance = self.instance
7479     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7480
7481     source_node = instance.primary_node
7482     target_node = self.target_node
7483
7484     if instance.admin_up:
7485       self.feedback_fn("* checking disk consistency between source and target")
7486       for dev in instance.disks:
7487         # for drbd, these are drbd over lvm
7488         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7489           if primary_node.offline:
7490             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7491                              " target node %s" %
7492                              (primary_node.name, dev.iv_name, target_node))
7493           elif not self.ignore_consistency:
7494             raise errors.OpExecError("Disk %s is degraded on target node,"
7495                                      " aborting failover" % dev.iv_name)
7496     else:
7497       self.feedback_fn("* not checking disk consistency as instance is not"
7498                        " running")
7499
7500     self.feedback_fn("* shutting down instance on source node")
7501     logging.info("Shutting down instance %s on node %s",
7502                  instance.name, source_node)
7503
7504     result = self.rpc.call_instance_shutdown(source_node, instance,
7505                                              self.shutdown_timeout)
7506     msg = result.fail_msg
7507     if msg:
7508       if self.ignore_consistency or primary_node.offline:
7509         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7510                            " proceeding anyway; please make sure node"
7511                            " %s is down; error details: %s",
7512                            instance.name, source_node, source_node, msg)
7513       else:
7514         raise errors.OpExecError("Could not shutdown instance %s on"
7515                                  " node %s: %s" %
7516                                  (instance.name, source_node, msg))
7517
7518     self.feedback_fn("* deactivating the instance's disks on source node")
7519     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7520       raise errors.OpExecError("Can't shut down the instance's disks")
7521
7522     instance.primary_node = target_node
7523     # distribute new instance config to the other nodes
7524     self.cfg.Update(instance, self.feedback_fn)
7525
7526     # Only start the instance if it's marked as up
7527     if instance.admin_up:
7528       self.feedback_fn("* activating the instance's disks on target node %s" %
7529                        target_node)
7530       logging.info("Starting instance %s on node %s",
7531                    instance.name, target_node)
7532
7533       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7534                                            ignore_secondaries=True)
7535       if not disks_ok:
7536         _ShutdownInstanceDisks(self.lu, instance)
7537         raise errors.OpExecError("Can't activate the instance's disks")
7538
7539       self.feedback_fn("* starting the instance on the target node %s" %
7540                        target_node)
7541       result = self.rpc.call_instance_start(target_node, instance, None, None,
7542                                             False)
7543       msg = result.fail_msg
7544       if msg:
7545         _ShutdownInstanceDisks(self.lu, instance)
7546         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7547                                  (instance.name, target_node, msg))
7548
7549   def Exec(self, feedback_fn):
7550     """Perform the migration.
7551
7552     """
7553     self.feedback_fn = feedback_fn
7554     self.source_node = self.instance.primary_node
7555
7556     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7557     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7558       self.target_node = self.instance.secondary_nodes[0]
7559       # Otherwise self.target_node has been populated either
7560       # directly, or through an iallocator.
7561
7562     self.all_nodes = [self.source_node, self.target_node]
7563     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7564                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7565
7566     if self.failover:
7567       feedback_fn("Failover instance %s" % self.instance.name)
7568       self._ExecFailover()
7569     else:
7570       feedback_fn("Migrating instance %s" % self.instance.name)
7571
7572       if self.cleanup:
7573         return self._ExecCleanup()
7574       else:
7575         return self._ExecMigration()
7576
7577
7578 def _CreateBlockDev(lu, node, instance, device, force_create,
7579                     info, force_open):
7580   """Create a tree of block devices on a given node.
7581
7582   If this device type has to be created on secondaries, create it and
7583   all its children.
7584
7585   If not, just recurse to children keeping the same 'force' value.
7586
7587   @param lu: the lu on whose behalf we execute
7588   @param node: the node on which to create the device
7589   @type instance: L{objects.Instance}
7590   @param instance: the instance which owns the device
7591   @type device: L{objects.Disk}
7592   @param device: the device to create
7593   @type force_create: boolean
7594   @param force_create: whether to force creation of this device; this
7595       will be change to True whenever we find a device which has
7596       CreateOnSecondary() attribute
7597   @param info: the extra 'metadata' we should attach to the device
7598       (this will be represented as a LVM tag)
7599   @type force_open: boolean
7600   @param force_open: this parameter will be passes to the
7601       L{backend.BlockdevCreate} function where it specifies
7602       whether we run on primary or not, and it affects both
7603       the child assembly and the device own Open() execution
7604
7605   """
7606   if device.CreateOnSecondary():
7607     force_create = True
7608
7609   if device.children:
7610     for child in device.children:
7611       _CreateBlockDev(lu, node, instance, child, force_create,
7612                       info, force_open)
7613
7614   if not force_create:
7615     return
7616
7617   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7618
7619
7620 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7621   """Create a single block device on a given node.
7622
7623   This will not recurse over children of the device, so they must be
7624   created in advance.
7625
7626   @param lu: the lu on whose behalf we execute
7627   @param node: the node on which to create the device
7628   @type instance: L{objects.Instance}
7629   @param instance: the instance which owns the device
7630   @type device: L{objects.Disk}
7631   @param device: the device to create
7632   @param info: the extra 'metadata' we should attach to the device
7633       (this will be represented as a LVM tag)
7634   @type force_open: boolean
7635   @param force_open: this parameter will be passes to the
7636       L{backend.BlockdevCreate} function where it specifies
7637       whether we run on primary or not, and it affects both
7638       the child assembly and the device own Open() execution
7639
7640   """
7641   lu.cfg.SetDiskID(device, node)
7642   result = lu.rpc.call_blockdev_create(node, device, device.size,
7643                                        instance.name, force_open, info)
7644   result.Raise("Can't create block device %s on"
7645                " node %s for instance %s" % (device, node, instance.name))
7646   if device.physical_id is None:
7647     device.physical_id = result.payload
7648
7649
7650 def _GenerateUniqueNames(lu, exts):
7651   """Generate a suitable LV name.
7652
7653   This will generate a logical volume name for the given instance.
7654
7655   """
7656   results = []
7657   for val in exts:
7658     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7659     results.append("%s%s" % (new_id, val))
7660   return results
7661
7662
7663 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7664                          iv_name, p_minor, s_minor):
7665   """Generate a drbd8 device complete with its children.
7666
7667   """
7668   assert len(vgnames) == len(names) == 2
7669   port = lu.cfg.AllocatePort()
7670   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7671   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7672                           logical_id=(vgnames[0], names[0]))
7673   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7674                           logical_id=(vgnames[1], names[1]))
7675   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7676                           logical_id=(primary, secondary, port,
7677                                       p_minor, s_minor,
7678                                       shared_secret),
7679                           children=[dev_data, dev_meta],
7680                           iv_name=iv_name)
7681   return drbd_dev
7682
7683
7684 def _GenerateDiskTemplate(lu, template_name,
7685                           instance_name, primary_node,
7686                           secondary_nodes, disk_info,
7687                           file_storage_dir, file_driver,
7688                           base_index, feedback_fn):
7689   """Generate the entire disk layout for a given template type.
7690
7691   """
7692   #TODO: compute space requirements
7693
7694   vgname = lu.cfg.GetVGName()
7695   disk_count = len(disk_info)
7696   disks = []
7697   if template_name == constants.DT_DISKLESS:
7698     pass
7699   elif template_name == constants.DT_PLAIN:
7700     if len(secondary_nodes) != 0:
7701       raise errors.ProgrammerError("Wrong template configuration")
7702
7703     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7704                                       for i in range(disk_count)])
7705     for idx, disk in enumerate(disk_info):
7706       disk_index = idx + base_index
7707       vg = disk.get(constants.IDISK_VG, vgname)
7708       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7709       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7710                               size=disk[constants.IDISK_SIZE],
7711                               logical_id=(vg, names[idx]),
7712                               iv_name="disk/%d" % disk_index,
7713                               mode=disk[constants.IDISK_MODE])
7714       disks.append(disk_dev)
7715   elif template_name == constants.DT_DRBD8:
7716     if len(secondary_nodes) != 1:
7717       raise errors.ProgrammerError("Wrong template configuration")
7718     remote_node = secondary_nodes[0]
7719     minors = lu.cfg.AllocateDRBDMinor(
7720       [primary_node, remote_node] * len(disk_info), instance_name)
7721
7722     names = []
7723     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7724                                                for i in range(disk_count)]):
7725       names.append(lv_prefix + "_data")
7726       names.append(lv_prefix + "_meta")
7727     for idx, disk in enumerate(disk_info):
7728       disk_index = idx + base_index
7729       data_vg = disk.get(constants.IDISK_VG, vgname)
7730       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7731       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7732                                       disk[constants.IDISK_SIZE],
7733                                       [data_vg, meta_vg],
7734                                       names[idx * 2:idx * 2 + 2],
7735                                       "disk/%d" % disk_index,
7736                                       minors[idx * 2], minors[idx * 2 + 1])
7737       disk_dev.mode = disk[constants.IDISK_MODE]
7738       disks.append(disk_dev)
7739   elif template_name == constants.DT_FILE:
7740     if len(secondary_nodes) != 0:
7741       raise errors.ProgrammerError("Wrong template configuration")
7742
7743     opcodes.RequireFileStorage()
7744
7745     for idx, disk in enumerate(disk_info):
7746       disk_index = idx + base_index
7747       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7748                               size=disk[constants.IDISK_SIZE],
7749                               iv_name="disk/%d" % disk_index,
7750                               logical_id=(file_driver,
7751                                           "%s/disk%d" % (file_storage_dir,
7752                                                          disk_index)),
7753                               mode=disk[constants.IDISK_MODE])
7754       disks.append(disk_dev)
7755   elif template_name == constants.DT_SHARED_FILE:
7756     if len(secondary_nodes) != 0:
7757       raise errors.ProgrammerError("Wrong template configuration")
7758
7759     opcodes.RequireSharedFileStorage()
7760
7761     for idx, disk in enumerate(disk_info):
7762       disk_index = idx + base_index
7763       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7764                               size=disk[constants.IDISK_SIZE],
7765                               iv_name="disk/%d" % disk_index,
7766                               logical_id=(file_driver,
7767                                           "%s/disk%d" % (file_storage_dir,
7768                                                          disk_index)),
7769                               mode=disk[constants.IDISK_MODE])
7770       disks.append(disk_dev)
7771   elif template_name == constants.DT_BLOCK:
7772     if len(secondary_nodes) != 0:
7773       raise errors.ProgrammerError("Wrong template configuration")
7774
7775     for idx, disk in enumerate(disk_info):
7776       disk_index = idx + base_index
7777       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7778                               size=disk[constants.IDISK_SIZE],
7779                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7780                                           disk[constants.IDISK_ADOPT]),
7781                               iv_name="disk/%d" % disk_index,
7782                               mode=disk[constants.IDISK_MODE])
7783       disks.append(disk_dev)
7784
7785   else:
7786     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7787   return disks
7788
7789
7790 def _GetInstanceInfoText(instance):
7791   """Compute that text that should be added to the disk's metadata.
7792
7793   """
7794   return "originstname+%s" % instance.name
7795
7796
7797 def _CalcEta(time_taken, written, total_size):
7798   """Calculates the ETA based on size written and total size.
7799
7800   @param time_taken: The time taken so far
7801   @param written: amount written so far
7802   @param total_size: The total size of data to be written
7803   @return: The remaining time in seconds
7804
7805   """
7806   avg_time = time_taken / float(written)
7807   return (total_size - written) * avg_time
7808
7809
7810 def _WipeDisks(lu, instance):
7811   """Wipes instance disks.
7812
7813   @type lu: L{LogicalUnit}
7814   @param lu: the logical unit on whose behalf we execute
7815   @type instance: L{objects.Instance}
7816   @param instance: the instance whose disks we should create
7817   @return: the success of the wipe
7818
7819   """
7820   node = instance.primary_node
7821
7822   for device in instance.disks:
7823     lu.cfg.SetDiskID(device, node)
7824
7825   logging.info("Pause sync of instance %s disks", instance.name)
7826   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7827
7828   for idx, success in enumerate(result.payload):
7829     if not success:
7830       logging.warn("pause-sync of instance %s for disks %d failed",
7831                    instance.name, idx)
7832
7833   try:
7834     for idx, device in enumerate(instance.disks):
7835       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7836       # MAX_WIPE_CHUNK at max
7837       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7838                             constants.MIN_WIPE_CHUNK_PERCENT)
7839       # we _must_ make this an int, otherwise rounding errors will
7840       # occur
7841       wipe_chunk_size = int(wipe_chunk_size)
7842
7843       lu.LogInfo("* Wiping disk %d", idx)
7844       logging.info("Wiping disk %d for instance %s, node %s using"
7845                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7846
7847       offset = 0
7848       size = device.size
7849       last_output = 0
7850       start_time = time.time()
7851
7852       while offset < size:
7853         wipe_size = min(wipe_chunk_size, size - offset)
7854         logging.debug("Wiping disk %d, offset %s, chunk %s",
7855                       idx, offset, wipe_size)
7856         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7857         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7858                      (idx, offset, wipe_size))
7859         now = time.time()
7860         offset += wipe_size
7861         if now - last_output >= 60:
7862           eta = _CalcEta(now - start_time, offset, size)
7863           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7864                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7865           last_output = now
7866   finally:
7867     logging.info("Resume sync of instance %s disks", instance.name)
7868
7869     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7870
7871     for idx, success in enumerate(result.payload):
7872       if not success:
7873         lu.LogWarning("Resume sync of disk %d failed, please have a"
7874                       " look at the status and troubleshoot the issue", idx)
7875         logging.warn("resume-sync of instance %s for disks %d failed",
7876                      instance.name, idx)
7877
7878
7879 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7880   """Create all disks for an instance.
7881
7882   This abstracts away some work from AddInstance.
7883
7884   @type lu: L{LogicalUnit}
7885   @param lu: the logical unit on whose behalf we execute
7886   @type instance: L{objects.Instance}
7887   @param instance: the instance whose disks we should create
7888   @type to_skip: list
7889   @param to_skip: list of indices to skip
7890   @type target_node: string
7891   @param target_node: if passed, overrides the target node for creation
7892   @rtype: boolean
7893   @return: the success of the creation
7894
7895   """
7896   info = _GetInstanceInfoText(instance)
7897   if target_node is None:
7898     pnode = instance.primary_node
7899     all_nodes = instance.all_nodes
7900   else:
7901     pnode = target_node
7902     all_nodes = [pnode]
7903
7904   if instance.disk_template in constants.DTS_FILEBASED:
7905     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7906     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7907
7908     result.Raise("Failed to create directory '%s' on"
7909                  " node %s" % (file_storage_dir, pnode))
7910
7911   # Note: this needs to be kept in sync with adding of disks in
7912   # LUInstanceSetParams
7913   for idx, device in enumerate(instance.disks):
7914     if to_skip and idx in to_skip:
7915       continue
7916     logging.info("Creating volume %s for instance %s",
7917                  device.iv_name, instance.name)
7918     #HARDCODE
7919     for node in all_nodes:
7920       f_create = node == pnode
7921       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7922
7923
7924 def _RemoveDisks(lu, instance, target_node=None):
7925   """Remove all disks for an instance.
7926
7927   This abstracts away some work from `AddInstance()` and
7928   `RemoveInstance()`. Note that in case some of the devices couldn't
7929   be removed, the removal will continue with the other ones (compare
7930   with `_CreateDisks()`).
7931
7932   @type lu: L{LogicalUnit}
7933   @param lu: the logical unit on whose behalf we execute
7934   @type instance: L{objects.Instance}
7935   @param instance: the instance whose disks we should remove
7936   @type target_node: string
7937   @param target_node: used to override the node on which to remove the disks
7938   @rtype: boolean
7939   @return: the success of the removal
7940
7941   """
7942   logging.info("Removing block devices for instance %s", instance.name)
7943
7944   all_result = True
7945   for device in instance.disks:
7946     if target_node:
7947       edata = [(target_node, device)]
7948     else:
7949       edata = device.ComputeNodeTree(instance.primary_node)
7950     for node, disk in edata:
7951       lu.cfg.SetDiskID(disk, node)
7952       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7953       if msg:
7954         lu.LogWarning("Could not remove block device %s on node %s,"
7955                       " continuing anyway: %s", device.iv_name, node, msg)
7956         all_result = False
7957
7958   if instance.disk_template == constants.DT_FILE:
7959     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7960     if target_node:
7961       tgt = target_node
7962     else:
7963       tgt = instance.primary_node
7964     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7965     if result.fail_msg:
7966       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7967                     file_storage_dir, instance.primary_node, result.fail_msg)
7968       all_result = False
7969
7970   return all_result
7971
7972
7973 def _ComputeDiskSizePerVG(disk_template, disks):
7974   """Compute disk size requirements in the volume group
7975
7976   """
7977   def _compute(disks, payload):
7978     """Universal algorithm.
7979
7980     """
7981     vgs = {}
7982     for disk in disks:
7983       vgs[disk[constants.IDISK_VG]] = \
7984         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7985
7986     return vgs
7987
7988   # Required free disk space as a function of disk and swap space
7989   req_size_dict = {
7990     constants.DT_DISKLESS: {},
7991     constants.DT_PLAIN: _compute(disks, 0),
7992     # 128 MB are added for drbd metadata for each disk
7993     constants.DT_DRBD8: _compute(disks, 128),
7994     constants.DT_FILE: {},
7995     constants.DT_SHARED_FILE: {},
7996   }
7997
7998   if disk_template not in req_size_dict:
7999     raise errors.ProgrammerError("Disk template '%s' size requirement"
8000                                  " is unknown" % disk_template)
8001
8002   return req_size_dict[disk_template]
8003
8004
8005 def _ComputeDiskSize(disk_template, disks):
8006   """Compute disk size requirements in the volume group
8007
8008   """
8009   # Required free disk space as a function of disk and swap space
8010   req_size_dict = {
8011     constants.DT_DISKLESS: None,
8012     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8013     # 128 MB are added for drbd metadata for each disk
8014     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8015     constants.DT_FILE: None,
8016     constants.DT_SHARED_FILE: 0,
8017     constants.DT_BLOCK: 0,
8018   }
8019
8020   if disk_template not in req_size_dict:
8021     raise errors.ProgrammerError("Disk template '%s' size requirement"
8022                                  " is unknown" % disk_template)
8023
8024   return req_size_dict[disk_template]
8025
8026
8027 def _FilterVmNodes(lu, nodenames):
8028   """Filters out non-vm_capable nodes from a list.
8029
8030   @type lu: L{LogicalUnit}
8031   @param lu: the logical unit for which we check
8032   @type nodenames: list
8033   @param nodenames: the list of nodes on which we should check
8034   @rtype: list
8035   @return: the list of vm-capable nodes
8036
8037   """
8038   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8039   return [name for name in nodenames if name not in vm_nodes]
8040
8041
8042 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8043   """Hypervisor parameter validation.
8044
8045   This function abstract the hypervisor parameter validation to be
8046   used in both instance create and instance modify.
8047
8048   @type lu: L{LogicalUnit}
8049   @param lu: the logical unit for which we check
8050   @type nodenames: list
8051   @param nodenames: the list of nodes on which we should check
8052   @type hvname: string
8053   @param hvname: the name of the hypervisor we should use
8054   @type hvparams: dict
8055   @param hvparams: the parameters which we need to check
8056   @raise errors.OpPrereqError: if the parameters are not valid
8057
8058   """
8059   nodenames = _FilterVmNodes(lu, nodenames)
8060   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8061                                                   hvname,
8062                                                   hvparams)
8063   for node in nodenames:
8064     info = hvinfo[node]
8065     if info.offline:
8066       continue
8067     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8068
8069
8070 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8071   """OS parameters validation.
8072
8073   @type lu: L{LogicalUnit}
8074   @param lu: the logical unit for which we check
8075   @type required: boolean
8076   @param required: whether the validation should fail if the OS is not
8077       found
8078   @type nodenames: list
8079   @param nodenames: the list of nodes on which we should check
8080   @type osname: string
8081   @param osname: the name of the hypervisor we should use
8082   @type osparams: dict
8083   @param osparams: the parameters which we need to check
8084   @raise errors.OpPrereqError: if the parameters are not valid
8085
8086   """
8087   nodenames = _FilterVmNodes(lu, nodenames)
8088   result = lu.rpc.call_os_validate(required, nodenames, osname,
8089                                    [constants.OS_VALIDATE_PARAMETERS],
8090                                    osparams)
8091   for node, nres in result.items():
8092     # we don't check for offline cases since this should be run only
8093     # against the master node and/or an instance's nodes
8094     nres.Raise("OS Parameters validation failed on node %s" % node)
8095     if not nres.payload:
8096       lu.LogInfo("OS %s not found on node %s, validation skipped",
8097                  osname, node)
8098
8099
8100 class LUInstanceCreate(LogicalUnit):
8101   """Create an instance.
8102
8103   """
8104   HPATH = "instance-add"
8105   HTYPE = constants.HTYPE_INSTANCE
8106   REQ_BGL = False
8107
8108   def CheckArguments(self):
8109     """Check arguments.
8110
8111     """
8112     # do not require name_check to ease forward/backward compatibility
8113     # for tools
8114     if self.op.no_install and self.op.start:
8115       self.LogInfo("No-installation mode selected, disabling startup")
8116       self.op.start = False
8117     # validate/normalize the instance name
8118     self.op.instance_name = \
8119       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8120
8121     if self.op.ip_check and not self.op.name_check:
8122       # TODO: make the ip check more flexible and not depend on the name check
8123       raise errors.OpPrereqError("Cannot do IP address check without a name"
8124                                  " check", errors.ECODE_INVAL)
8125
8126     # check nics' parameter names
8127     for nic in self.op.nics:
8128       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8129
8130     # check disks. parameter names and consistent adopt/no-adopt strategy
8131     has_adopt = has_no_adopt = False
8132     for disk in self.op.disks:
8133       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8134       if constants.IDISK_ADOPT in disk:
8135         has_adopt = True
8136       else:
8137         has_no_adopt = True
8138     if has_adopt and has_no_adopt:
8139       raise errors.OpPrereqError("Either all disks are adopted or none is",
8140                                  errors.ECODE_INVAL)
8141     if has_adopt:
8142       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8143         raise errors.OpPrereqError("Disk adoption is not supported for the"
8144                                    " '%s' disk template" %
8145                                    self.op.disk_template,
8146                                    errors.ECODE_INVAL)
8147       if self.op.iallocator is not None:
8148         raise errors.OpPrereqError("Disk adoption not allowed with an"
8149                                    " iallocator script", errors.ECODE_INVAL)
8150       if self.op.mode == constants.INSTANCE_IMPORT:
8151         raise errors.OpPrereqError("Disk adoption not allowed for"
8152                                    " instance import", errors.ECODE_INVAL)
8153     else:
8154       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8155         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8156                                    " but no 'adopt' parameter given" %
8157                                    self.op.disk_template,
8158                                    errors.ECODE_INVAL)
8159
8160     self.adopt_disks = has_adopt
8161
8162     # instance name verification
8163     if self.op.name_check:
8164       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8165       self.op.instance_name = self.hostname1.name
8166       # used in CheckPrereq for ip ping check
8167       self.check_ip = self.hostname1.ip
8168     else:
8169       self.check_ip = None
8170
8171     # file storage checks
8172     if (self.op.file_driver and
8173         not self.op.file_driver in constants.FILE_DRIVER):
8174       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8175                                  self.op.file_driver, errors.ECODE_INVAL)
8176
8177     if self.op.disk_template == constants.DT_FILE:
8178       opcodes.RequireFileStorage()
8179     elif self.op.disk_template == constants.DT_SHARED_FILE:
8180       opcodes.RequireSharedFileStorage()
8181
8182     ### Node/iallocator related checks
8183     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8184
8185     if self.op.pnode is not None:
8186       if self.op.disk_template in constants.DTS_INT_MIRROR:
8187         if self.op.snode is None:
8188           raise errors.OpPrereqError("The networked disk templates need"
8189                                      " a mirror node", errors.ECODE_INVAL)
8190       elif self.op.snode:
8191         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8192                         " template")
8193         self.op.snode = None
8194
8195     self._cds = _GetClusterDomainSecret()
8196
8197     if self.op.mode == constants.INSTANCE_IMPORT:
8198       # On import force_variant must be True, because if we forced it at
8199       # initial install, our only chance when importing it back is that it
8200       # works again!
8201       self.op.force_variant = True
8202
8203       if self.op.no_install:
8204         self.LogInfo("No-installation mode has no effect during import")
8205
8206     elif self.op.mode == constants.INSTANCE_CREATE:
8207       if self.op.os_type is None:
8208         raise errors.OpPrereqError("No guest OS specified",
8209                                    errors.ECODE_INVAL)
8210       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8211         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8212                                    " installation" % self.op.os_type,
8213                                    errors.ECODE_STATE)
8214       if self.op.disk_template is None:
8215         raise errors.OpPrereqError("No disk template specified",
8216                                    errors.ECODE_INVAL)
8217
8218     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8219       # Check handshake to ensure both clusters have the same domain secret
8220       src_handshake = self.op.source_handshake
8221       if not src_handshake:
8222         raise errors.OpPrereqError("Missing source handshake",
8223                                    errors.ECODE_INVAL)
8224
8225       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8226                                                            src_handshake)
8227       if errmsg:
8228         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8229                                    errors.ECODE_INVAL)
8230
8231       # Load and check source CA
8232       self.source_x509_ca_pem = self.op.source_x509_ca
8233       if not self.source_x509_ca_pem:
8234         raise errors.OpPrereqError("Missing source X509 CA",
8235                                    errors.ECODE_INVAL)
8236
8237       try:
8238         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8239                                                     self._cds)
8240       except OpenSSL.crypto.Error, err:
8241         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8242                                    (err, ), errors.ECODE_INVAL)
8243
8244       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8245       if errcode is not None:
8246         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8247                                    errors.ECODE_INVAL)
8248
8249       self.source_x509_ca = cert
8250
8251       src_instance_name = self.op.source_instance_name
8252       if not src_instance_name:
8253         raise errors.OpPrereqError("Missing source instance name",
8254                                    errors.ECODE_INVAL)
8255
8256       self.source_instance_name = \
8257           netutils.GetHostname(name=src_instance_name).name
8258
8259     else:
8260       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8261                                  self.op.mode, errors.ECODE_INVAL)
8262
8263   def ExpandNames(self):
8264     """ExpandNames for CreateInstance.
8265
8266     Figure out the right locks for instance creation.
8267
8268     """
8269     self.needed_locks = {}
8270
8271     instance_name = self.op.instance_name
8272     # this is just a preventive check, but someone might still add this
8273     # instance in the meantime, and creation will fail at lock-add time
8274     if instance_name in self.cfg.GetInstanceList():
8275       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8276                                  instance_name, errors.ECODE_EXISTS)
8277
8278     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8279
8280     if self.op.iallocator:
8281       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8282     else:
8283       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8284       nodelist = [self.op.pnode]
8285       if self.op.snode is not None:
8286         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8287         nodelist.append(self.op.snode)
8288       self.needed_locks[locking.LEVEL_NODE] = nodelist
8289
8290     # in case of import lock the source node too
8291     if self.op.mode == constants.INSTANCE_IMPORT:
8292       src_node = self.op.src_node
8293       src_path = self.op.src_path
8294
8295       if src_path is None:
8296         self.op.src_path = src_path = self.op.instance_name
8297
8298       if src_node is None:
8299         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8300         self.op.src_node = None
8301         if os.path.isabs(src_path):
8302           raise errors.OpPrereqError("Importing an instance from a path"
8303                                      " requires a source node option",
8304                                      errors.ECODE_INVAL)
8305       else:
8306         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8307         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8308           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8309         if not os.path.isabs(src_path):
8310           self.op.src_path = src_path = \
8311             utils.PathJoin(constants.EXPORT_DIR, src_path)
8312
8313   def _RunAllocator(self):
8314     """Run the allocator based on input opcode.
8315
8316     """
8317     nics = [n.ToDict() for n in self.nics]
8318     ial = IAllocator(self.cfg, self.rpc,
8319                      mode=constants.IALLOCATOR_MODE_ALLOC,
8320                      name=self.op.instance_name,
8321                      disk_template=self.op.disk_template,
8322                      tags=self.op.tags,
8323                      os=self.op.os_type,
8324                      vcpus=self.be_full[constants.BE_VCPUS],
8325                      memory=self.be_full[constants.BE_MEMORY],
8326                      disks=self.disks,
8327                      nics=nics,
8328                      hypervisor=self.op.hypervisor,
8329                      )
8330
8331     ial.Run(self.op.iallocator)
8332
8333     if not ial.success:
8334       raise errors.OpPrereqError("Can't compute nodes using"
8335                                  " iallocator '%s': %s" %
8336                                  (self.op.iallocator, ial.info),
8337                                  errors.ECODE_NORES)
8338     if len(ial.result) != ial.required_nodes:
8339       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8340                                  " of nodes (%s), required %s" %
8341                                  (self.op.iallocator, len(ial.result),
8342                                   ial.required_nodes), errors.ECODE_FAULT)
8343     self.op.pnode = ial.result[0]
8344     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8345                  self.op.instance_name, self.op.iallocator,
8346                  utils.CommaJoin(ial.result))
8347     if ial.required_nodes == 2:
8348       self.op.snode = ial.result[1]
8349
8350   def BuildHooksEnv(self):
8351     """Build hooks env.
8352
8353     This runs on master, primary and secondary nodes of the instance.
8354
8355     """
8356     env = {
8357       "ADD_MODE": self.op.mode,
8358       }
8359     if self.op.mode == constants.INSTANCE_IMPORT:
8360       env["SRC_NODE"] = self.op.src_node
8361       env["SRC_PATH"] = self.op.src_path
8362       env["SRC_IMAGES"] = self.src_images
8363
8364     env.update(_BuildInstanceHookEnv(
8365       name=self.op.instance_name,
8366       primary_node=self.op.pnode,
8367       secondary_nodes=self.secondaries,
8368       status=self.op.start,
8369       os_type=self.op.os_type,
8370       memory=self.be_full[constants.BE_MEMORY],
8371       vcpus=self.be_full[constants.BE_VCPUS],
8372       nics=_NICListToTuple(self, self.nics),
8373       disk_template=self.op.disk_template,
8374       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8375              for d in self.disks],
8376       bep=self.be_full,
8377       hvp=self.hv_full,
8378       hypervisor_name=self.op.hypervisor,
8379       tags=self.op.tags,
8380     ))
8381
8382     return env
8383
8384   def BuildHooksNodes(self):
8385     """Build hooks nodes.
8386
8387     """
8388     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8389     return nl, nl
8390
8391   def _ReadExportInfo(self):
8392     """Reads the export information from disk.
8393
8394     It will override the opcode source node and path with the actual
8395     information, if these two were not specified before.
8396
8397     @return: the export information
8398
8399     """
8400     assert self.op.mode == constants.INSTANCE_IMPORT
8401
8402     src_node = self.op.src_node
8403     src_path = self.op.src_path
8404
8405     if src_node is None:
8406       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8407       exp_list = self.rpc.call_export_list(locked_nodes)
8408       found = False
8409       for node in exp_list:
8410         if exp_list[node].fail_msg:
8411           continue
8412         if src_path in exp_list[node].payload:
8413           found = True
8414           self.op.src_node = src_node = node
8415           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8416                                                        src_path)
8417           break
8418       if not found:
8419         raise errors.OpPrereqError("No export found for relative path %s" %
8420                                     src_path, errors.ECODE_INVAL)
8421
8422     _CheckNodeOnline(self, src_node)
8423     result = self.rpc.call_export_info(src_node, src_path)
8424     result.Raise("No export or invalid export found in dir %s" % src_path)
8425
8426     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8427     if not export_info.has_section(constants.INISECT_EXP):
8428       raise errors.ProgrammerError("Corrupted export config",
8429                                    errors.ECODE_ENVIRON)
8430
8431     ei_version = export_info.get(constants.INISECT_EXP, "version")
8432     if (int(ei_version) != constants.EXPORT_VERSION):
8433       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8434                                  (ei_version, constants.EXPORT_VERSION),
8435                                  errors.ECODE_ENVIRON)
8436     return export_info
8437
8438   def _ReadExportParams(self, einfo):
8439     """Use export parameters as defaults.
8440
8441     In case the opcode doesn't specify (as in override) some instance
8442     parameters, then try to use them from the export information, if
8443     that declares them.
8444
8445     """
8446     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8447
8448     if self.op.disk_template is None:
8449       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8450         self.op.disk_template = einfo.get(constants.INISECT_INS,
8451                                           "disk_template")
8452       else:
8453         raise errors.OpPrereqError("No disk template specified and the export"
8454                                    " is missing the disk_template information",
8455                                    errors.ECODE_INVAL)
8456
8457     if not self.op.disks:
8458       if einfo.has_option(constants.INISECT_INS, "disk_count"):
8459         disks = []
8460         # TODO: import the disk iv_name too
8461         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8462           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8463           disks.append({constants.IDISK_SIZE: disk_sz})
8464         self.op.disks = disks
8465       else:
8466         raise errors.OpPrereqError("No disk info specified and the export"
8467                                    " is missing the disk information",
8468                                    errors.ECODE_INVAL)
8469
8470     if (not self.op.nics and
8471         einfo.has_option(constants.INISECT_INS, "nic_count")):
8472       nics = []
8473       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8474         ndict = {}
8475         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8476           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8477           ndict[name] = v
8478         nics.append(ndict)
8479       self.op.nics = nics
8480
8481     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8482       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8483
8484     if (self.op.hypervisor is None and
8485         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8486       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8487
8488     if einfo.has_section(constants.INISECT_HYP):
8489       # use the export parameters but do not override the ones
8490       # specified by the user
8491       for name, value in einfo.items(constants.INISECT_HYP):
8492         if name not in self.op.hvparams:
8493           self.op.hvparams[name] = value
8494
8495     if einfo.has_section(constants.INISECT_BEP):
8496       # use the parameters, without overriding
8497       for name, value in einfo.items(constants.INISECT_BEP):
8498         if name not in self.op.beparams:
8499           self.op.beparams[name] = value
8500     else:
8501       # try to read the parameters old style, from the main section
8502       for name in constants.BES_PARAMETERS:
8503         if (name not in self.op.beparams and
8504             einfo.has_option(constants.INISECT_INS, name)):
8505           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8506
8507     if einfo.has_section(constants.INISECT_OSP):
8508       # use the parameters, without overriding
8509       for name, value in einfo.items(constants.INISECT_OSP):
8510         if name not in self.op.osparams:
8511           self.op.osparams[name] = value
8512
8513   def _RevertToDefaults(self, cluster):
8514     """Revert the instance parameters to the default values.
8515
8516     """
8517     # hvparams
8518     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8519     for name in self.op.hvparams.keys():
8520       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8521         del self.op.hvparams[name]
8522     # beparams
8523     be_defs = cluster.SimpleFillBE({})
8524     for name in self.op.beparams.keys():
8525       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8526         del self.op.beparams[name]
8527     # nic params
8528     nic_defs = cluster.SimpleFillNIC({})
8529     for nic in self.op.nics:
8530       for name in constants.NICS_PARAMETERS:
8531         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8532           del nic[name]
8533     # osparams
8534     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8535     for name in self.op.osparams.keys():
8536       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8537         del self.op.osparams[name]
8538
8539   def _CalculateFileStorageDir(self):
8540     """Calculate final instance file storage dir.
8541
8542     """
8543     # file storage dir calculation/check
8544     self.instance_file_storage_dir = None
8545     if self.op.disk_template in constants.DTS_FILEBASED:
8546       # build the full file storage dir path
8547       joinargs = []
8548
8549       if self.op.disk_template == constants.DT_SHARED_FILE:
8550         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8551       else:
8552         get_fsd_fn = self.cfg.GetFileStorageDir
8553
8554       cfg_storagedir = get_fsd_fn()
8555       if not cfg_storagedir:
8556         raise errors.OpPrereqError("Cluster file storage dir not defined")
8557       joinargs.append(cfg_storagedir)
8558
8559       if self.op.file_storage_dir is not None:
8560         joinargs.append(self.op.file_storage_dir)
8561
8562       joinargs.append(self.op.instance_name)
8563
8564       # pylint: disable=W0142
8565       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8566
8567   def CheckPrereq(self):
8568     """Check prerequisites.
8569
8570     """
8571     self._CalculateFileStorageDir()
8572
8573     if self.op.mode == constants.INSTANCE_IMPORT:
8574       export_info = self._ReadExportInfo()
8575       self._ReadExportParams(export_info)
8576
8577     if (not self.cfg.GetVGName() and
8578         self.op.disk_template not in constants.DTS_NOT_LVM):
8579       raise errors.OpPrereqError("Cluster does not support lvm-based"
8580                                  " instances", errors.ECODE_STATE)
8581
8582     if self.op.hypervisor is None:
8583       self.op.hypervisor = self.cfg.GetHypervisorType()
8584
8585     cluster = self.cfg.GetClusterInfo()
8586     enabled_hvs = cluster.enabled_hypervisors
8587     if self.op.hypervisor not in enabled_hvs:
8588       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8589                                  " cluster (%s)" % (self.op.hypervisor,
8590                                   ",".join(enabled_hvs)),
8591                                  errors.ECODE_STATE)
8592
8593     # Check tag validity
8594     for tag in self.op.tags:
8595       objects.TaggableObject.ValidateTag(tag)
8596
8597     # check hypervisor parameter syntax (locally)
8598     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8599     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8600                                       self.op.hvparams)
8601     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8602     hv_type.CheckParameterSyntax(filled_hvp)
8603     self.hv_full = filled_hvp
8604     # check that we don't specify global parameters on an instance
8605     _CheckGlobalHvParams(self.op.hvparams)
8606
8607     # fill and remember the beparams dict
8608     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8609     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8610
8611     # build os parameters
8612     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8613
8614     # now that hvp/bep are in final format, let's reset to defaults,
8615     # if told to do so
8616     if self.op.identify_defaults:
8617       self._RevertToDefaults(cluster)
8618
8619     # NIC buildup
8620     self.nics = []
8621     for idx, nic in enumerate(self.op.nics):
8622       nic_mode_req = nic.get(constants.INIC_MODE, None)
8623       nic_mode = nic_mode_req
8624       if nic_mode is None:
8625         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8626
8627       # in routed mode, for the first nic, the default ip is 'auto'
8628       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8629         default_ip_mode = constants.VALUE_AUTO
8630       else:
8631         default_ip_mode = constants.VALUE_NONE
8632
8633       # ip validity checks
8634       ip = nic.get(constants.INIC_IP, default_ip_mode)
8635       if ip is None or ip.lower() == constants.VALUE_NONE:
8636         nic_ip = None
8637       elif ip.lower() == constants.VALUE_AUTO:
8638         if not self.op.name_check:
8639           raise errors.OpPrereqError("IP address set to auto but name checks"
8640                                      " have been skipped",
8641                                      errors.ECODE_INVAL)
8642         nic_ip = self.hostname1.ip
8643       else:
8644         if not netutils.IPAddress.IsValid(ip):
8645           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8646                                      errors.ECODE_INVAL)
8647         nic_ip = ip
8648
8649       # TODO: check the ip address for uniqueness
8650       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8651         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8652                                    errors.ECODE_INVAL)
8653
8654       # MAC address verification
8655       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8656       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8657         mac = utils.NormalizeAndValidateMac(mac)
8658
8659         try:
8660           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8661         except errors.ReservationError:
8662           raise errors.OpPrereqError("MAC address %s already in use"
8663                                      " in cluster" % mac,
8664                                      errors.ECODE_NOTUNIQUE)
8665
8666       #  Build nic parameters
8667       link = nic.get(constants.INIC_LINK, None)
8668       nicparams = {}
8669       if nic_mode_req:
8670         nicparams[constants.NIC_MODE] = nic_mode_req
8671       if link:
8672         nicparams[constants.NIC_LINK] = link
8673
8674       check_params = cluster.SimpleFillNIC(nicparams)
8675       objects.NIC.CheckParameterSyntax(check_params)
8676       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8677
8678     # disk checks/pre-build
8679     default_vg = self.cfg.GetVGName()
8680     self.disks = []
8681     for disk in self.op.disks:
8682       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8683       if mode not in constants.DISK_ACCESS_SET:
8684         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8685                                    mode, errors.ECODE_INVAL)
8686       size = disk.get(constants.IDISK_SIZE, None)
8687       if size is None:
8688         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8689       try:
8690         size = int(size)
8691       except (TypeError, ValueError):
8692         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8693                                    errors.ECODE_INVAL)
8694
8695       data_vg = disk.get(constants.IDISK_VG, default_vg)
8696       new_disk = {
8697         constants.IDISK_SIZE: size,
8698         constants.IDISK_MODE: mode,
8699         constants.IDISK_VG: data_vg,
8700         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8701         }
8702       if constants.IDISK_ADOPT in disk:
8703         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8704       self.disks.append(new_disk)
8705
8706     if self.op.mode == constants.INSTANCE_IMPORT:
8707
8708       # Check that the new instance doesn't have less disks than the export
8709       instance_disks = len(self.disks)
8710       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8711       if instance_disks < export_disks:
8712         raise errors.OpPrereqError("Not enough disks to import."
8713                                    " (instance: %d, export: %d)" %
8714                                    (instance_disks, export_disks),
8715                                    errors.ECODE_INVAL)
8716
8717       disk_images = []
8718       for idx in range(export_disks):
8719         option = "disk%d_dump" % idx
8720         if export_info.has_option(constants.INISECT_INS, option):
8721           # FIXME: are the old os-es, disk sizes, etc. useful?
8722           export_name = export_info.get(constants.INISECT_INS, option)
8723           image = utils.PathJoin(self.op.src_path, export_name)
8724           disk_images.append(image)
8725         else:
8726           disk_images.append(False)
8727
8728       self.src_images = disk_images
8729
8730       old_name = export_info.get(constants.INISECT_INS, "name")
8731       try:
8732         exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8733       except (TypeError, ValueError), err:
8734         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8735                                    " an integer: %s" % str(err),
8736                                    errors.ECODE_STATE)
8737       if self.op.instance_name == old_name:
8738         for idx, nic in enumerate(self.nics):
8739           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8740             nic_mac_ini = "nic%d_mac" % idx
8741             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8742
8743     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8744
8745     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8746     if self.op.ip_check:
8747       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8748         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8749                                    (self.check_ip, self.op.instance_name),
8750                                    errors.ECODE_NOTUNIQUE)
8751
8752     #### mac address generation
8753     # By generating here the mac address both the allocator and the hooks get
8754     # the real final mac address rather than the 'auto' or 'generate' value.
8755     # There is a race condition between the generation and the instance object
8756     # creation, which means that we know the mac is valid now, but we're not
8757     # sure it will be when we actually add the instance. If things go bad
8758     # adding the instance will abort because of a duplicate mac, and the
8759     # creation job will fail.
8760     for nic in self.nics:
8761       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8762         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8763
8764     #### allocator run
8765
8766     if self.op.iallocator is not None:
8767       self._RunAllocator()
8768
8769     #### node related checks
8770
8771     # check primary node
8772     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8773     assert self.pnode is not None, \
8774       "Cannot retrieve locked node %s" % self.op.pnode
8775     if pnode.offline:
8776       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8777                                  pnode.name, errors.ECODE_STATE)
8778     if pnode.drained:
8779       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8780                                  pnode.name, errors.ECODE_STATE)
8781     if not pnode.vm_capable:
8782       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8783                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8784
8785     self.secondaries = []
8786
8787     # mirror node verification
8788     if self.op.disk_template in constants.DTS_INT_MIRROR:
8789       if self.op.snode == pnode.name:
8790         raise errors.OpPrereqError("The secondary node cannot be the"
8791                                    " primary node", errors.ECODE_INVAL)
8792       _CheckNodeOnline(self, self.op.snode)
8793       _CheckNodeNotDrained(self, self.op.snode)
8794       _CheckNodeVmCapable(self, self.op.snode)
8795       self.secondaries.append(self.op.snode)
8796
8797     nodenames = [pnode.name] + self.secondaries
8798
8799     if not self.adopt_disks:
8800       # Check lv size requirements, if not adopting
8801       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8802       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8803
8804     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8805       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8806                                 disk[constants.IDISK_ADOPT])
8807                      for disk in self.disks])
8808       if len(all_lvs) != len(self.disks):
8809         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8810                                    errors.ECODE_INVAL)
8811       for lv_name in all_lvs:
8812         try:
8813           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8814           # to ReserveLV uses the same syntax
8815           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8816         except errors.ReservationError:
8817           raise errors.OpPrereqError("LV named %s used by another instance" %
8818                                      lv_name, errors.ECODE_NOTUNIQUE)
8819
8820       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8821       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8822
8823       node_lvs = self.rpc.call_lv_list([pnode.name],
8824                                        vg_names.payload.keys())[pnode.name]
8825       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8826       node_lvs = node_lvs.payload
8827
8828       delta = all_lvs.difference(node_lvs.keys())
8829       if delta:
8830         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8831                                    utils.CommaJoin(delta),
8832                                    errors.ECODE_INVAL)
8833       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8834       if online_lvs:
8835         raise errors.OpPrereqError("Online logical volumes found, cannot"
8836                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8837                                    errors.ECODE_STATE)
8838       # update the size of disk based on what is found
8839       for dsk in self.disks:
8840         dsk[constants.IDISK_SIZE] = \
8841           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8842                                         dsk[constants.IDISK_ADOPT])][0]))
8843
8844     elif self.op.disk_template == constants.DT_BLOCK:
8845       # Normalize and de-duplicate device paths
8846       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8847                        for disk in self.disks])
8848       if len(all_disks) != len(self.disks):
8849         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8850                                    errors.ECODE_INVAL)
8851       baddisks = [d for d in all_disks
8852                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8853       if baddisks:
8854         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8855                                    " cannot be adopted" %
8856                                    (", ".join(baddisks),
8857                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8858                                    errors.ECODE_INVAL)
8859
8860       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8861                                             list(all_disks))[pnode.name]
8862       node_disks.Raise("Cannot get block device information from node %s" %
8863                        pnode.name)
8864       node_disks = node_disks.payload
8865       delta = all_disks.difference(node_disks.keys())
8866       if delta:
8867         raise errors.OpPrereqError("Missing block device(s): %s" %
8868                                    utils.CommaJoin(delta),
8869                                    errors.ECODE_INVAL)
8870       for dsk in self.disks:
8871         dsk[constants.IDISK_SIZE] = \
8872           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8873
8874     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8875
8876     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8877     # check OS parameters (remotely)
8878     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8879
8880     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8881
8882     # memory check on primary node
8883     if self.op.start:
8884       _CheckNodeFreeMemory(self, self.pnode.name,
8885                            "creating instance %s" % self.op.instance_name,
8886                            self.be_full[constants.BE_MEMORY],
8887                            self.op.hypervisor)
8888
8889     self.dry_run_result = list(nodenames)
8890
8891   def Exec(self, feedback_fn):
8892     """Create and add the instance to the cluster.
8893
8894     """
8895     instance = self.op.instance_name
8896     pnode_name = self.pnode.name
8897
8898     ht_kind = self.op.hypervisor
8899     if ht_kind in constants.HTS_REQ_PORT:
8900       network_port = self.cfg.AllocatePort()
8901     else:
8902       network_port = None
8903
8904     disks = _GenerateDiskTemplate(self,
8905                                   self.op.disk_template,
8906                                   instance, pnode_name,
8907                                   self.secondaries,
8908                                   self.disks,
8909                                   self.instance_file_storage_dir,
8910                                   self.op.file_driver,
8911                                   0,
8912                                   feedback_fn)
8913
8914     iobj = objects.Instance(name=instance, os=self.op.os_type,
8915                             primary_node=pnode_name,
8916                             nics=self.nics, disks=disks,
8917                             disk_template=self.op.disk_template,
8918                             admin_up=False,
8919                             network_port=network_port,
8920                             beparams=self.op.beparams,
8921                             hvparams=self.op.hvparams,
8922                             hypervisor=self.op.hypervisor,
8923                             osparams=self.op.osparams,
8924                             )
8925
8926     if self.op.tags:
8927       for tag in self.op.tags:
8928         iobj.AddTag(tag)
8929
8930     if self.adopt_disks:
8931       if self.op.disk_template == constants.DT_PLAIN:
8932         # rename LVs to the newly-generated names; we need to construct
8933         # 'fake' LV disks with the old data, plus the new unique_id
8934         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8935         rename_to = []
8936         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8937           rename_to.append(t_dsk.logical_id)
8938           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8939           self.cfg.SetDiskID(t_dsk, pnode_name)
8940         result = self.rpc.call_blockdev_rename(pnode_name,
8941                                                zip(tmp_disks, rename_to))
8942         result.Raise("Failed to rename adoped LVs")
8943     else:
8944       feedback_fn("* creating instance disks...")
8945       try:
8946         _CreateDisks(self, iobj)
8947       except errors.OpExecError:
8948         self.LogWarning("Device creation failed, reverting...")
8949         try:
8950           _RemoveDisks(self, iobj)
8951         finally:
8952           self.cfg.ReleaseDRBDMinors(instance)
8953           raise
8954
8955     feedback_fn("adding instance %s to cluster config" % instance)
8956
8957     self.cfg.AddInstance(iobj, self.proc.GetECId())
8958
8959     # Declare that we don't want to remove the instance lock anymore, as we've
8960     # added the instance to the config
8961     del self.remove_locks[locking.LEVEL_INSTANCE]
8962
8963     if self.op.mode == constants.INSTANCE_IMPORT:
8964       # Release unused nodes
8965       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8966     else:
8967       # Release all nodes
8968       _ReleaseLocks(self, locking.LEVEL_NODE)
8969
8970     disk_abort = False
8971     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8972       feedback_fn("* wiping instance disks...")
8973       try:
8974         _WipeDisks(self, iobj)
8975       except errors.OpExecError, err:
8976         logging.exception("Wiping disks failed")
8977         self.LogWarning("Wiping instance disks failed (%s)", err)
8978         disk_abort = True
8979
8980     if disk_abort:
8981       # Something is already wrong with the disks, don't do anything else
8982       pass
8983     elif self.op.wait_for_sync:
8984       disk_abort = not _WaitForSync(self, iobj)
8985     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8986       # make sure the disks are not degraded (still sync-ing is ok)
8987       feedback_fn("* checking mirrors status")
8988       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8989     else:
8990       disk_abort = False
8991
8992     if disk_abort:
8993       _RemoveDisks(self, iobj)
8994       self.cfg.RemoveInstance(iobj.name)
8995       # Make sure the instance lock gets removed
8996       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8997       raise errors.OpExecError("There are some degraded disks for"
8998                                " this instance")
8999
9000     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9001       if self.op.mode == constants.INSTANCE_CREATE:
9002         if not self.op.no_install:
9003           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9004                         not self.op.wait_for_sync)
9005           if pause_sync:
9006             feedback_fn("* pausing disk sync to install instance OS")
9007             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9008                                                               iobj.disks, True)
9009             for idx, success in enumerate(result.payload):
9010               if not success:
9011                 logging.warn("pause-sync of instance %s for disk %d failed",
9012                              instance, idx)
9013
9014           feedback_fn("* running the instance OS create scripts...")
9015           # FIXME: pass debug option from opcode to backend
9016           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
9017                                                  self.op.debug_level)
9018           if pause_sync:
9019             feedback_fn("* resuming disk sync")
9020             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9021                                                               iobj.disks, False)
9022             for idx, success in enumerate(result.payload):
9023               if not success:
9024                 logging.warn("resume-sync of instance %s for disk %d failed",
9025                              instance, idx)
9026
9027           result.Raise("Could not add os for instance %s"
9028                        " on node %s" % (instance, pnode_name))
9029
9030       elif self.op.mode == constants.INSTANCE_IMPORT:
9031         feedback_fn("* running the instance OS import scripts...")
9032
9033         transfers = []
9034
9035         for idx, image in enumerate(self.src_images):
9036           if not image:
9037             continue
9038
9039           # FIXME: pass debug option from opcode to backend
9040           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9041                                              constants.IEIO_FILE, (image, ),
9042                                              constants.IEIO_SCRIPT,
9043                                              (iobj.disks[idx], idx),
9044                                              None)
9045           transfers.append(dt)
9046
9047         import_result = \
9048           masterd.instance.TransferInstanceData(self, feedback_fn,
9049                                                 self.op.src_node, pnode_name,
9050                                                 self.pnode.secondary_ip,
9051                                                 iobj, transfers)
9052         if not compat.all(import_result):
9053           self.LogWarning("Some disks for instance %s on node %s were not"
9054                           " imported successfully" % (instance, pnode_name))
9055
9056       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9057         feedback_fn("* preparing remote import...")
9058         # The source cluster will stop the instance before attempting to make a
9059         # connection. In some cases stopping an instance can take a long time,
9060         # hence the shutdown timeout is added to the connection timeout.
9061         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9062                            self.op.source_shutdown_timeout)
9063         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9064
9065         assert iobj.primary_node == self.pnode.name
9066         disk_results = \
9067           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9068                                         self.source_x509_ca,
9069                                         self._cds, timeouts)
9070         if not compat.all(disk_results):
9071           # TODO: Should the instance still be started, even if some disks
9072           # failed to import (valid for local imports, too)?
9073           self.LogWarning("Some disks for instance %s on node %s were not"
9074                           " imported successfully" % (instance, pnode_name))
9075
9076         # Run rename script on newly imported instance
9077         assert iobj.name == instance
9078         feedback_fn("Running rename script for %s" % instance)
9079         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9080                                                    self.source_instance_name,
9081                                                    self.op.debug_level)
9082         if result.fail_msg:
9083           self.LogWarning("Failed to run rename script for %s on node"
9084                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9085
9086       else:
9087         # also checked in the prereq part
9088         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9089                                      % self.op.mode)
9090
9091     if self.op.start:
9092       iobj.admin_up = True
9093       self.cfg.Update(iobj, feedback_fn)
9094       logging.info("Starting instance %s on node %s", instance, pnode_name)
9095       feedback_fn("* starting instance...")
9096       result = self.rpc.call_instance_start(pnode_name, iobj,
9097                                             None, None, False)
9098       result.Raise("Could not start instance")
9099
9100     return list(iobj.all_nodes)
9101
9102
9103 class LUInstanceConsole(NoHooksLU):
9104   """Connect to an instance's console.
9105
9106   This is somewhat special in that it returns the command line that
9107   you need to run on the master node in order to connect to the
9108   console.
9109
9110   """
9111   REQ_BGL = False
9112
9113   def ExpandNames(self):
9114     self._ExpandAndLockInstance()
9115
9116   def CheckPrereq(self):
9117     """Check prerequisites.
9118
9119     This checks that the instance is in the cluster.
9120
9121     """
9122     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9123     assert self.instance is not None, \
9124       "Cannot retrieve locked instance %s" % self.op.instance_name
9125     _CheckNodeOnline(self, self.instance.primary_node)
9126
9127   def Exec(self, feedback_fn):
9128     """Connect to the console of an instance
9129
9130     """
9131     instance = self.instance
9132     node = instance.primary_node
9133
9134     node_insts = self.rpc.call_instance_list([node],
9135                                              [instance.hypervisor])[node]
9136     node_insts.Raise("Can't get node information from %s" % node)
9137
9138     if instance.name not in node_insts.payload:
9139       if instance.admin_up:
9140         state = constants.INSTST_ERRORDOWN
9141       else:
9142         state = constants.INSTST_ADMINDOWN
9143       raise errors.OpExecError("Instance %s is not running (state %s)" %
9144                                (instance.name, state))
9145
9146     logging.debug("Connecting to console of %s on %s", instance.name, node)
9147
9148     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9149
9150
9151 def _GetInstanceConsole(cluster, instance):
9152   """Returns console information for an instance.
9153
9154   @type cluster: L{objects.Cluster}
9155   @type instance: L{objects.Instance}
9156   @rtype: dict
9157
9158   """
9159   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9160   # beparams and hvparams are passed separately, to avoid editing the
9161   # instance and then saving the defaults in the instance itself.
9162   hvparams = cluster.FillHV(instance)
9163   beparams = cluster.FillBE(instance)
9164   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9165
9166   assert console.instance == instance.name
9167   assert console.Validate()
9168
9169   return console.ToDict()
9170
9171
9172 class LUInstanceReplaceDisks(LogicalUnit):
9173   """Replace the disks of an instance.
9174
9175   """
9176   HPATH = "mirrors-replace"
9177   HTYPE = constants.HTYPE_INSTANCE
9178   REQ_BGL = False
9179
9180   def CheckArguments(self):
9181     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9182                                   self.op.iallocator)
9183
9184   def ExpandNames(self):
9185     self._ExpandAndLockInstance()
9186
9187     assert locking.LEVEL_NODE not in self.needed_locks
9188     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9189
9190     assert self.op.iallocator is None or self.op.remote_node is None, \
9191       "Conflicting options"
9192
9193     if self.op.remote_node is not None:
9194       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9195
9196       # Warning: do not remove the locking of the new secondary here
9197       # unless DRBD8.AddChildren is changed to work in parallel;
9198       # currently it doesn't since parallel invocations of
9199       # FindUnusedMinor will conflict
9200       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9201       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9202     else:
9203       self.needed_locks[locking.LEVEL_NODE] = []
9204       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9205
9206       if self.op.iallocator is not None:
9207         # iallocator will select a new node in the same group
9208         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9209
9210     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9211                                    self.op.iallocator, self.op.remote_node,
9212                                    self.op.disks, False, self.op.early_release)
9213
9214     self.tasklets = [self.replacer]
9215
9216   def DeclareLocks(self, level):
9217     if level == locking.LEVEL_NODEGROUP:
9218       assert self.op.remote_node is None
9219       assert self.op.iallocator is not None
9220       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9221
9222       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9223       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9224         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9225
9226     elif level == locking.LEVEL_NODE:
9227       if self.op.iallocator is not None:
9228         assert self.op.remote_node is None
9229         assert not self.needed_locks[locking.LEVEL_NODE]
9230
9231         # Lock member nodes of all locked groups
9232         self.needed_locks[locking.LEVEL_NODE] = [node_name
9233           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9234           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9235       else:
9236         self._LockInstancesNodes()
9237
9238   def BuildHooksEnv(self):
9239     """Build hooks env.
9240
9241     This runs on the master, the primary and all the secondaries.
9242
9243     """
9244     instance = self.replacer.instance
9245     env = {
9246       "MODE": self.op.mode,
9247       "NEW_SECONDARY": self.op.remote_node,
9248       "OLD_SECONDARY": instance.secondary_nodes[0],
9249       }
9250     env.update(_BuildInstanceHookEnvByObject(self, instance))
9251     return env
9252
9253   def BuildHooksNodes(self):
9254     """Build hooks nodes.
9255
9256     """
9257     instance = self.replacer.instance
9258     nl = [
9259       self.cfg.GetMasterNode(),
9260       instance.primary_node,
9261       ]
9262     if self.op.remote_node is not None:
9263       nl.append(self.op.remote_node)
9264     return nl, nl
9265
9266   def CheckPrereq(self):
9267     """Check prerequisites.
9268
9269     """
9270     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9271             self.op.iallocator is None)
9272
9273     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9274     if owned_groups:
9275       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9276
9277     return LogicalUnit.CheckPrereq(self)
9278
9279
9280 class TLReplaceDisks(Tasklet):
9281   """Replaces disks for an instance.
9282
9283   Note: Locking is not within the scope of this class.
9284
9285   """
9286   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9287                disks, delay_iallocator, early_release):
9288     """Initializes this class.
9289
9290     """
9291     Tasklet.__init__(self, lu)
9292
9293     # Parameters
9294     self.instance_name = instance_name
9295     self.mode = mode
9296     self.iallocator_name = iallocator_name
9297     self.remote_node = remote_node
9298     self.disks = disks
9299     self.delay_iallocator = delay_iallocator
9300     self.early_release = early_release
9301
9302     # Runtime data
9303     self.instance = None
9304     self.new_node = None
9305     self.target_node = None
9306     self.other_node = None
9307     self.remote_node_info = None
9308     self.node_secondary_ip = None
9309
9310   @staticmethod
9311   def CheckArguments(mode, remote_node, iallocator):
9312     """Helper function for users of this class.
9313
9314     """
9315     # check for valid parameter combination
9316     if mode == constants.REPLACE_DISK_CHG:
9317       if remote_node is None and iallocator is None:
9318         raise errors.OpPrereqError("When changing the secondary either an"
9319                                    " iallocator script must be used or the"
9320                                    " new node given", errors.ECODE_INVAL)
9321
9322       if remote_node is not None and iallocator is not None:
9323         raise errors.OpPrereqError("Give either the iallocator or the new"
9324                                    " secondary, not both", errors.ECODE_INVAL)
9325
9326     elif remote_node is not None or iallocator is not None:
9327       # Not replacing the secondary
9328       raise errors.OpPrereqError("The iallocator and new node options can"
9329                                  " only be used when changing the"
9330                                  " secondary node", errors.ECODE_INVAL)
9331
9332   @staticmethod
9333   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9334     """Compute a new secondary node using an IAllocator.
9335
9336     """
9337     ial = IAllocator(lu.cfg, lu.rpc,
9338                      mode=constants.IALLOCATOR_MODE_RELOC,
9339                      name=instance_name,
9340                      relocate_from=list(relocate_from))
9341
9342     ial.Run(iallocator_name)
9343
9344     if not ial.success:
9345       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9346                                  " %s" % (iallocator_name, ial.info),
9347                                  errors.ECODE_NORES)
9348
9349     if len(ial.result) != ial.required_nodes:
9350       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9351                                  " of nodes (%s), required %s" %
9352                                  (iallocator_name,
9353                                   len(ial.result), ial.required_nodes),
9354                                  errors.ECODE_FAULT)
9355
9356     remote_node_name = ial.result[0]
9357
9358     lu.LogInfo("Selected new secondary for instance '%s': %s",
9359                instance_name, remote_node_name)
9360
9361     return remote_node_name
9362
9363   def _FindFaultyDisks(self, node_name):
9364     """Wrapper for L{_FindFaultyInstanceDisks}.
9365
9366     """
9367     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9368                                     node_name, True)
9369
9370   def _CheckDisksActivated(self, instance):
9371     """Checks if the instance disks are activated.
9372
9373     @param instance: The instance to check disks
9374     @return: True if they are activated, False otherwise
9375
9376     """
9377     nodes = instance.all_nodes
9378
9379     for idx, dev in enumerate(instance.disks):
9380       for node in nodes:
9381         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9382         self.cfg.SetDiskID(dev, node)
9383
9384         result = self.rpc.call_blockdev_find(node, dev)
9385
9386         if result.offline:
9387           continue
9388         elif result.fail_msg or not result.payload:
9389           return False
9390
9391     return True
9392
9393   def CheckPrereq(self):
9394     """Check prerequisites.
9395
9396     This checks that the instance is in the cluster.
9397
9398     """
9399     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9400     assert instance is not None, \
9401       "Cannot retrieve locked instance %s" % self.instance_name
9402
9403     if instance.disk_template != constants.DT_DRBD8:
9404       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9405                                  " instances", errors.ECODE_INVAL)
9406
9407     if len(instance.secondary_nodes) != 1:
9408       raise errors.OpPrereqError("The instance has a strange layout,"
9409                                  " expected one secondary but found %d" %
9410                                  len(instance.secondary_nodes),
9411                                  errors.ECODE_FAULT)
9412
9413     if not self.delay_iallocator:
9414       self._CheckPrereq2()
9415
9416   def _CheckPrereq2(self):
9417     """Check prerequisites, second part.
9418
9419     This function should always be part of CheckPrereq. It was separated and is
9420     now called from Exec because during node evacuation iallocator was only
9421     called with an unmodified cluster model, not taking planned changes into
9422     account.
9423
9424     """
9425     instance = self.instance
9426     secondary_node = instance.secondary_nodes[0]
9427
9428     if self.iallocator_name is None:
9429       remote_node = self.remote_node
9430     else:
9431       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9432                                        instance.name, instance.secondary_nodes)
9433
9434     if remote_node is None:
9435       self.remote_node_info = None
9436     else:
9437       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9438              "Remote node '%s' is not locked" % remote_node
9439
9440       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9441       assert self.remote_node_info is not None, \
9442         "Cannot retrieve locked node %s" % remote_node
9443
9444     if remote_node == self.instance.primary_node:
9445       raise errors.OpPrereqError("The specified node is the primary node of"
9446                                  " the instance", errors.ECODE_INVAL)
9447
9448     if remote_node == secondary_node:
9449       raise errors.OpPrereqError("The specified node is already the"
9450                                  " secondary node of the instance",
9451                                  errors.ECODE_INVAL)
9452
9453     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9454                                     constants.REPLACE_DISK_CHG):
9455       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9456                                  errors.ECODE_INVAL)
9457
9458     if self.mode == constants.REPLACE_DISK_AUTO:
9459       if not self._CheckDisksActivated(instance):
9460         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9461                                    " first" % self.instance_name,
9462                                    errors.ECODE_STATE)
9463       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9464       faulty_secondary = self._FindFaultyDisks(secondary_node)
9465
9466       if faulty_primary and faulty_secondary:
9467         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9468                                    " one node and can not be repaired"
9469                                    " automatically" % self.instance_name,
9470                                    errors.ECODE_STATE)
9471
9472       if faulty_primary:
9473         self.disks = faulty_primary
9474         self.target_node = instance.primary_node
9475         self.other_node = secondary_node
9476         check_nodes = [self.target_node, self.other_node]
9477       elif faulty_secondary:
9478         self.disks = faulty_secondary
9479         self.target_node = secondary_node
9480         self.other_node = instance.primary_node
9481         check_nodes = [self.target_node, self.other_node]
9482       else:
9483         self.disks = []
9484         check_nodes = []
9485
9486     else:
9487       # Non-automatic modes
9488       if self.mode == constants.REPLACE_DISK_PRI:
9489         self.target_node = instance.primary_node
9490         self.other_node = secondary_node
9491         check_nodes = [self.target_node, self.other_node]
9492
9493       elif self.mode == constants.REPLACE_DISK_SEC:
9494         self.target_node = secondary_node
9495         self.other_node = instance.primary_node
9496         check_nodes = [self.target_node, self.other_node]
9497
9498       elif self.mode == constants.REPLACE_DISK_CHG:
9499         self.new_node = remote_node
9500         self.other_node = instance.primary_node
9501         self.target_node = secondary_node
9502         check_nodes = [self.new_node, self.other_node]
9503
9504         _CheckNodeNotDrained(self.lu, remote_node)
9505         _CheckNodeVmCapable(self.lu, remote_node)
9506
9507         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9508         assert old_node_info is not None
9509         if old_node_info.offline and not self.early_release:
9510           # doesn't make sense to delay the release
9511           self.early_release = True
9512           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9513                           " early-release mode", secondary_node)
9514
9515       else:
9516         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9517                                      self.mode)
9518
9519       # If not specified all disks should be replaced
9520       if not self.disks:
9521         self.disks = range(len(self.instance.disks))
9522
9523     for node in check_nodes:
9524       _CheckNodeOnline(self.lu, node)
9525
9526     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9527                                                           self.other_node,
9528                                                           self.target_node]
9529                               if node_name is not None)
9530
9531     # Release unneeded node locks
9532     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9533
9534     # Release any owned node group
9535     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9536       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9537
9538     # Check whether disks are valid
9539     for disk_idx in self.disks:
9540       instance.FindDisk(disk_idx)
9541
9542     # Get secondary node IP addresses
9543     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9544                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9545
9546   def Exec(self, feedback_fn):
9547     """Execute disk replacement.
9548
9549     This dispatches the disk replacement to the appropriate handler.
9550
9551     """
9552     if self.delay_iallocator:
9553       self._CheckPrereq2()
9554
9555     if __debug__:
9556       # Verify owned locks before starting operation
9557       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9558       assert set(owned_nodes) == set(self.node_secondary_ip), \
9559           ("Incorrect node locks, owning %s, expected %s" %
9560            (owned_nodes, self.node_secondary_ip.keys()))
9561
9562       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9563       assert list(owned_instances) == [self.instance_name], \
9564           "Instance '%s' not locked" % self.instance_name
9565
9566       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9567           "Should not own any node group lock at this point"
9568
9569     if not self.disks:
9570       feedback_fn("No disks need replacement")
9571       return
9572
9573     feedback_fn("Replacing disk(s) %s for %s" %
9574                 (utils.CommaJoin(self.disks), self.instance.name))
9575
9576     activate_disks = (not self.instance.admin_up)
9577
9578     # Activate the instance disks if we're replacing them on a down instance
9579     if activate_disks:
9580       _StartInstanceDisks(self.lu, self.instance, True)
9581
9582     try:
9583       # Should we replace the secondary node?
9584       if self.new_node is not None:
9585         fn = self._ExecDrbd8Secondary
9586       else:
9587         fn = self._ExecDrbd8DiskOnly
9588
9589       result = fn(feedback_fn)
9590     finally:
9591       # Deactivate the instance disks if we're replacing them on a
9592       # down instance
9593       if activate_disks:
9594         _SafeShutdownInstanceDisks(self.lu, self.instance)
9595
9596     if __debug__:
9597       # Verify owned locks
9598       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9599       nodes = frozenset(self.node_secondary_ip)
9600       assert ((self.early_release and not owned_nodes) or
9601               (not self.early_release and not (set(owned_nodes) - nodes))), \
9602         ("Not owning the correct locks, early_release=%s, owned=%r,"
9603          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9604
9605     return result
9606
9607   def _CheckVolumeGroup(self, nodes):
9608     self.lu.LogInfo("Checking volume groups")
9609
9610     vgname = self.cfg.GetVGName()
9611
9612     # Make sure volume group exists on all involved nodes
9613     results = self.rpc.call_vg_list(nodes)
9614     if not results:
9615       raise errors.OpExecError("Can't list volume groups on the nodes")
9616
9617     for node in nodes:
9618       res = results[node]
9619       res.Raise("Error checking node %s" % node)
9620       if vgname not in res.payload:
9621         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9622                                  (vgname, node))
9623
9624   def _CheckDisksExistence(self, nodes):
9625     # Check disk existence
9626     for idx, dev in enumerate(self.instance.disks):
9627       if idx not in self.disks:
9628         continue
9629
9630       for node in nodes:
9631         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9632         self.cfg.SetDiskID(dev, node)
9633
9634         result = self.rpc.call_blockdev_find(node, dev)
9635
9636         msg = result.fail_msg
9637         if msg or not result.payload:
9638           if not msg:
9639             msg = "disk not found"
9640           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9641                                    (idx, node, msg))
9642
9643   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9644     for idx, dev in enumerate(self.instance.disks):
9645       if idx not in self.disks:
9646         continue
9647
9648       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9649                       (idx, node_name))
9650
9651       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9652                                    ldisk=ldisk):
9653         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9654                                  " replace disks for instance %s" %
9655                                  (node_name, self.instance.name))
9656
9657   def _CreateNewStorage(self, node_name):
9658     """Create new storage on the primary or secondary node.
9659
9660     This is only used for same-node replaces, not for changing the
9661     secondary node, hence we don't want to modify the existing disk.
9662
9663     """
9664     iv_names = {}
9665
9666     for idx, dev in enumerate(self.instance.disks):
9667       if idx not in self.disks:
9668         continue
9669
9670       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9671
9672       self.cfg.SetDiskID(dev, node_name)
9673
9674       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9675       names = _GenerateUniqueNames(self.lu, lv_names)
9676
9677       vg_data = dev.children[0].logical_id[0]
9678       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9679                              logical_id=(vg_data, names[0]))
9680       vg_meta = dev.children[1].logical_id[0]
9681       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9682                              logical_id=(vg_meta, names[1]))
9683
9684       new_lvs = [lv_data, lv_meta]
9685       old_lvs = [child.Copy() for child in dev.children]
9686       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9687
9688       # we pass force_create=True to force the LVM creation
9689       for new_lv in new_lvs:
9690         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9691                         _GetInstanceInfoText(self.instance), False)
9692
9693     return iv_names
9694
9695   def _CheckDevices(self, node_name, iv_names):
9696     for name, (dev, _, _) in iv_names.iteritems():
9697       self.cfg.SetDiskID(dev, node_name)
9698
9699       result = self.rpc.call_blockdev_find(node_name, dev)
9700
9701       msg = result.fail_msg
9702       if msg or not result.payload:
9703         if not msg:
9704           msg = "disk not found"
9705         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9706                                  (name, msg))
9707
9708       if result.payload.is_degraded:
9709         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9710
9711   def _RemoveOldStorage(self, node_name, iv_names):
9712     for name, (_, old_lvs, _) in iv_names.iteritems():
9713       self.lu.LogInfo("Remove logical volumes for %s" % name)
9714
9715       for lv in old_lvs:
9716         self.cfg.SetDiskID(lv, node_name)
9717
9718         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9719         if msg:
9720           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9721                              hint="remove unused LVs manually")
9722
9723   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9724     """Replace a disk on the primary or secondary for DRBD 8.
9725
9726     The algorithm for replace is quite complicated:
9727
9728       1. for each disk to be replaced:
9729
9730         1. create new LVs on the target node with unique names
9731         1. detach old LVs from the drbd device
9732         1. rename old LVs to name_replaced.<time_t>
9733         1. rename new LVs to old LVs
9734         1. attach the new LVs (with the old names now) to the drbd device
9735
9736       1. wait for sync across all devices
9737
9738       1. for each modified disk:
9739
9740         1. remove old LVs (which have the name name_replaces.<time_t>)
9741
9742     Failures are not very well handled.
9743
9744     """
9745     steps_total = 6
9746
9747     # Step: check device activation
9748     self.lu.LogStep(1, steps_total, "Check device existence")
9749     self._CheckDisksExistence([self.other_node, self.target_node])
9750     self._CheckVolumeGroup([self.target_node, self.other_node])
9751
9752     # Step: check other node consistency
9753     self.lu.LogStep(2, steps_total, "Check peer consistency")
9754     self._CheckDisksConsistency(self.other_node,
9755                                 self.other_node == self.instance.primary_node,
9756                                 False)
9757
9758     # Step: create new storage
9759     self.lu.LogStep(3, steps_total, "Allocate new storage")
9760     iv_names = self._CreateNewStorage(self.target_node)
9761
9762     # Step: for each lv, detach+rename*2+attach
9763     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9764     for dev, old_lvs, new_lvs in iv_names.itervalues():
9765       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9766
9767       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9768                                                      old_lvs)
9769       result.Raise("Can't detach drbd from local storage on node"
9770                    " %s for device %s" % (self.target_node, dev.iv_name))
9771       #dev.children = []
9772       #cfg.Update(instance)
9773
9774       # ok, we created the new LVs, so now we know we have the needed
9775       # storage; as such, we proceed on the target node to rename
9776       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9777       # using the assumption that logical_id == physical_id (which in
9778       # turn is the unique_id on that node)
9779
9780       # FIXME(iustin): use a better name for the replaced LVs
9781       temp_suffix = int(time.time())
9782       ren_fn = lambda d, suff: (d.physical_id[0],
9783                                 d.physical_id[1] + "_replaced-%s" % suff)
9784
9785       # Build the rename list based on what LVs exist on the node
9786       rename_old_to_new = []
9787       for to_ren in old_lvs:
9788         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9789         if not result.fail_msg and result.payload:
9790           # device exists
9791           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9792
9793       self.lu.LogInfo("Renaming the old LVs on the target node")
9794       result = self.rpc.call_blockdev_rename(self.target_node,
9795                                              rename_old_to_new)
9796       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9797
9798       # Now we rename the new LVs to the old LVs
9799       self.lu.LogInfo("Renaming the new LVs on the target node")
9800       rename_new_to_old = [(new, old.physical_id)
9801                            for old, new in zip(old_lvs, new_lvs)]
9802       result = self.rpc.call_blockdev_rename(self.target_node,
9803                                              rename_new_to_old)
9804       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9805
9806       # Intermediate steps of in memory modifications
9807       for old, new in zip(old_lvs, new_lvs):
9808         new.logical_id = old.logical_id
9809         self.cfg.SetDiskID(new, self.target_node)
9810
9811       # We need to modify old_lvs so that removal later removes the
9812       # right LVs, not the newly added ones; note that old_lvs is a
9813       # copy here
9814       for disk in old_lvs:
9815         disk.logical_id = ren_fn(disk, temp_suffix)
9816         self.cfg.SetDiskID(disk, self.target_node)
9817
9818       # Now that the new lvs have the old name, we can add them to the device
9819       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9820       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9821                                                   new_lvs)
9822       msg = result.fail_msg
9823       if msg:
9824         for new_lv in new_lvs:
9825           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9826                                                new_lv).fail_msg
9827           if msg2:
9828             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9829                                hint=("cleanup manually the unused logical"
9830                                      "volumes"))
9831         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9832
9833     cstep = 5
9834     if self.early_release:
9835       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9836       cstep += 1
9837       self._RemoveOldStorage(self.target_node, iv_names)
9838       # WARNING: we release both node locks here, do not do other RPCs
9839       # than WaitForSync to the primary node
9840       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9841                     names=[self.target_node, self.other_node])
9842
9843     # Wait for sync
9844     # This can fail as the old devices are degraded and _WaitForSync
9845     # does a combined result over all disks, so we don't check its return value
9846     self.lu.LogStep(cstep, steps_total, "Sync devices")
9847     cstep += 1
9848     _WaitForSync(self.lu, self.instance)
9849
9850     # Check all devices manually
9851     self._CheckDevices(self.instance.primary_node, iv_names)
9852
9853     # Step: remove old storage
9854     if not self.early_release:
9855       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9856       cstep += 1
9857       self._RemoveOldStorage(self.target_node, iv_names)
9858
9859   def _ExecDrbd8Secondary(self, feedback_fn):
9860     """Replace the secondary node for DRBD 8.
9861
9862     The algorithm for replace is quite complicated:
9863       - for all disks of the instance:
9864         - create new LVs on the new node with same names
9865         - shutdown the drbd device on the old secondary
9866         - disconnect the drbd network on the primary
9867         - create the drbd device on the new secondary
9868         - network attach the drbd on the primary, using an artifice:
9869           the drbd code for Attach() will connect to the network if it
9870           finds a device which is connected to the good local disks but
9871           not network enabled
9872       - wait for sync across all devices
9873       - remove all disks from the old secondary
9874
9875     Failures are not very well handled.
9876
9877     """
9878     steps_total = 6
9879
9880     pnode = self.instance.primary_node
9881
9882     # Step: check device activation
9883     self.lu.LogStep(1, steps_total, "Check device existence")
9884     self._CheckDisksExistence([self.instance.primary_node])
9885     self._CheckVolumeGroup([self.instance.primary_node])
9886
9887     # Step: check other node consistency
9888     self.lu.LogStep(2, steps_total, "Check peer consistency")
9889     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9890
9891     # Step: create new storage
9892     self.lu.LogStep(3, steps_total, "Allocate new storage")
9893     for idx, dev in enumerate(self.instance.disks):
9894       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9895                       (self.new_node, idx))
9896       # we pass force_create=True to force LVM creation
9897       for new_lv in dev.children:
9898         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9899                         _GetInstanceInfoText(self.instance), False)
9900
9901     # Step 4: dbrd minors and drbd setups changes
9902     # after this, we must manually remove the drbd minors on both the
9903     # error and the success paths
9904     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9905     minors = self.cfg.AllocateDRBDMinor([self.new_node
9906                                          for dev in self.instance.disks],
9907                                         self.instance.name)
9908     logging.debug("Allocated minors %r", minors)
9909
9910     iv_names = {}
9911     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9912       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9913                       (self.new_node, idx))
9914       # create new devices on new_node; note that we create two IDs:
9915       # one without port, so the drbd will be activated without
9916       # networking information on the new node at this stage, and one
9917       # with network, for the latter activation in step 4
9918       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9919       if self.instance.primary_node == o_node1:
9920         p_minor = o_minor1
9921       else:
9922         assert self.instance.primary_node == o_node2, "Three-node instance?"
9923         p_minor = o_minor2
9924
9925       new_alone_id = (self.instance.primary_node, self.new_node, None,
9926                       p_minor, new_minor, o_secret)
9927       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9928                     p_minor, new_minor, o_secret)
9929
9930       iv_names[idx] = (dev, dev.children, new_net_id)
9931       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9932                     new_net_id)
9933       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9934                               logical_id=new_alone_id,
9935                               children=dev.children,
9936                               size=dev.size)
9937       try:
9938         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9939                               _GetInstanceInfoText(self.instance), False)
9940       except errors.GenericError:
9941         self.cfg.ReleaseDRBDMinors(self.instance.name)
9942         raise
9943
9944     # We have new devices, shutdown the drbd on the old secondary
9945     for idx, dev in enumerate(self.instance.disks):
9946       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9947       self.cfg.SetDiskID(dev, self.target_node)
9948       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9949       if msg:
9950         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9951                            "node: %s" % (idx, msg),
9952                            hint=("Please cleanup this device manually as"
9953                                  " soon as possible"))
9954
9955     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9956     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9957                                                self.instance.disks)[pnode]
9958
9959     msg = result.fail_msg
9960     if msg:
9961       # detaches didn't succeed (unlikely)
9962       self.cfg.ReleaseDRBDMinors(self.instance.name)
9963       raise errors.OpExecError("Can't detach the disks from the network on"
9964                                " old node: %s" % (msg,))
9965
9966     # if we managed to detach at least one, we update all the disks of
9967     # the instance to point to the new secondary
9968     self.lu.LogInfo("Updating instance configuration")
9969     for dev, _, new_logical_id in iv_names.itervalues():
9970       dev.logical_id = new_logical_id
9971       self.cfg.SetDiskID(dev, self.instance.primary_node)
9972
9973     self.cfg.Update(self.instance, feedback_fn)
9974
9975     # and now perform the drbd attach
9976     self.lu.LogInfo("Attaching primary drbds to new secondary"
9977                     " (standalone => connected)")
9978     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9979                                             self.new_node],
9980                                            self.node_secondary_ip,
9981                                            self.instance.disks,
9982                                            self.instance.name,
9983                                            False)
9984     for to_node, to_result in result.items():
9985       msg = to_result.fail_msg
9986       if msg:
9987         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9988                            to_node, msg,
9989                            hint=("please do a gnt-instance info to see the"
9990                                  " status of disks"))
9991     cstep = 5
9992     if self.early_release:
9993       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9994       cstep += 1
9995       self._RemoveOldStorage(self.target_node, iv_names)
9996       # WARNING: we release all node locks here, do not do other RPCs
9997       # than WaitForSync to the primary node
9998       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9999                     names=[self.instance.primary_node,
10000                            self.target_node,
10001                            self.new_node])
10002
10003     # Wait for sync
10004     # This can fail as the old devices are degraded and _WaitForSync
10005     # does a combined result over all disks, so we don't check its return value
10006     self.lu.LogStep(cstep, steps_total, "Sync devices")
10007     cstep += 1
10008     _WaitForSync(self.lu, self.instance)
10009
10010     # Check all devices manually
10011     self._CheckDevices(self.instance.primary_node, iv_names)
10012
10013     # Step: remove old storage
10014     if not self.early_release:
10015       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10016       self._RemoveOldStorage(self.target_node, iv_names)
10017
10018
10019 class LURepairNodeStorage(NoHooksLU):
10020   """Repairs the volume group on a node.
10021
10022   """
10023   REQ_BGL = False
10024
10025   def CheckArguments(self):
10026     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10027
10028     storage_type = self.op.storage_type
10029
10030     if (constants.SO_FIX_CONSISTENCY not in
10031         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10032       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10033                                  " repaired" % storage_type,
10034                                  errors.ECODE_INVAL)
10035
10036   def ExpandNames(self):
10037     self.needed_locks = {
10038       locking.LEVEL_NODE: [self.op.node_name],
10039       }
10040
10041   def _CheckFaultyDisks(self, instance, node_name):
10042     """Ensure faulty disks abort the opcode or at least warn."""
10043     try:
10044       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10045                                   node_name, True):
10046         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10047                                    " node '%s'" % (instance.name, node_name),
10048                                    errors.ECODE_STATE)
10049     except errors.OpPrereqError, err:
10050       if self.op.ignore_consistency:
10051         self.proc.LogWarning(str(err.args[0]))
10052       else:
10053         raise
10054
10055   def CheckPrereq(self):
10056     """Check prerequisites.
10057
10058     """
10059     # Check whether any instance on this node has faulty disks
10060     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10061       if not inst.admin_up:
10062         continue
10063       check_nodes = set(inst.all_nodes)
10064       check_nodes.discard(self.op.node_name)
10065       for inst_node_name in check_nodes:
10066         self._CheckFaultyDisks(inst, inst_node_name)
10067
10068   def Exec(self, feedback_fn):
10069     feedback_fn("Repairing storage unit '%s' on %s ..." %
10070                 (self.op.name, self.op.node_name))
10071
10072     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10073     result = self.rpc.call_storage_execute(self.op.node_name,
10074                                            self.op.storage_type, st_args,
10075                                            self.op.name,
10076                                            constants.SO_FIX_CONSISTENCY)
10077     result.Raise("Failed to repair storage unit '%s' on %s" %
10078                  (self.op.name, self.op.node_name))
10079
10080
10081 class LUNodeEvacuate(NoHooksLU):
10082   """Evacuates instances off a list of nodes.
10083
10084   """
10085   REQ_BGL = False
10086
10087   def CheckArguments(self):
10088     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10089
10090   def ExpandNames(self):
10091     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10092
10093     if self.op.remote_node is not None:
10094       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10095       assert self.op.remote_node
10096
10097       if self.op.remote_node == self.op.node_name:
10098         raise errors.OpPrereqError("Can not use evacuated node as a new"
10099                                    " secondary node", errors.ECODE_INVAL)
10100
10101       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10102         raise errors.OpPrereqError("Without the use of an iallocator only"
10103                                    " secondary instances can be evacuated",
10104                                    errors.ECODE_INVAL)
10105
10106     # Declare locks
10107     self.share_locks = _ShareAll()
10108     self.needed_locks = {
10109       locking.LEVEL_INSTANCE: [],
10110       locking.LEVEL_NODEGROUP: [],
10111       locking.LEVEL_NODE: [],
10112       }
10113
10114     if self.op.remote_node is None:
10115       # Iallocator will choose any node(s) in the same group
10116       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10117     else:
10118       group_nodes = frozenset([self.op.remote_node])
10119
10120     # Determine nodes to be locked
10121     self.lock_nodes = set([self.op.node_name]) | group_nodes
10122
10123   def _DetermineInstances(self):
10124     """Builds list of instances to operate on.
10125
10126     """
10127     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10128
10129     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10130       # Primary instances only
10131       inst_fn = _GetNodePrimaryInstances
10132       assert self.op.remote_node is None, \
10133         "Evacuating primary instances requires iallocator"
10134     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10135       # Secondary instances only
10136       inst_fn = _GetNodeSecondaryInstances
10137     else:
10138       # All instances
10139       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10140       inst_fn = _GetNodeInstances
10141
10142     return inst_fn(self.cfg, self.op.node_name)
10143
10144   def DeclareLocks(self, level):
10145     if level == locking.LEVEL_INSTANCE:
10146       # Lock instances optimistically, needs verification once node and group
10147       # locks have been acquired
10148       self.needed_locks[locking.LEVEL_INSTANCE] = \
10149         set(i.name for i in self._DetermineInstances())
10150
10151     elif level == locking.LEVEL_NODEGROUP:
10152       # Lock node groups optimistically, needs verification once nodes have
10153       # been acquired
10154       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10155         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10156
10157     elif level == locking.LEVEL_NODE:
10158       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10159
10160   def CheckPrereq(self):
10161     # Verify locks
10162     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10163     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10164     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10165
10166     assert owned_nodes == self.lock_nodes
10167
10168     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10169     if owned_groups != wanted_groups:
10170       raise errors.OpExecError("Node groups changed since locks were acquired,"
10171                                " current groups are '%s', used to be '%s'" %
10172                                (utils.CommaJoin(wanted_groups),
10173                                 utils.CommaJoin(owned_groups)))
10174
10175     # Determine affected instances
10176     self.instances = self._DetermineInstances()
10177     self.instance_names = [i.name for i in self.instances]
10178
10179     if set(self.instance_names) != owned_instances:
10180       raise errors.OpExecError("Instances on node '%s' changed since locks"
10181                                " were acquired, current instances are '%s',"
10182                                " used to be '%s'" %
10183                                (self.op.node_name,
10184                                 utils.CommaJoin(self.instance_names),
10185                                 utils.CommaJoin(owned_instances)))
10186
10187     if self.instance_names:
10188       self.LogInfo("Evacuating instances from node '%s': %s",
10189                    self.op.node_name,
10190                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10191     else:
10192       self.LogInfo("No instances to evacuate from node '%s'",
10193                    self.op.node_name)
10194
10195     if self.op.remote_node is not None:
10196       for i in self.instances:
10197         if i.primary_node == self.op.remote_node:
10198           raise errors.OpPrereqError("Node %s is the primary node of"
10199                                      " instance %s, cannot use it as"
10200                                      " secondary" %
10201                                      (self.op.remote_node, i.name),
10202                                      errors.ECODE_INVAL)
10203
10204   def Exec(self, feedback_fn):
10205     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10206
10207     if not self.instance_names:
10208       # No instances to evacuate
10209       jobs = []
10210
10211     elif self.op.iallocator is not None:
10212       # TODO: Implement relocation to other group
10213       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10214                        evac_mode=self.op.mode,
10215                        instances=list(self.instance_names))
10216
10217       ial.Run(self.op.iallocator)
10218
10219       if not ial.success:
10220         raise errors.OpPrereqError("Can't compute node evacuation using"
10221                                    " iallocator '%s': %s" %
10222                                    (self.op.iallocator, ial.info),
10223                                    errors.ECODE_NORES)
10224
10225       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10226
10227     elif self.op.remote_node is not None:
10228       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10229       jobs = [
10230         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10231                                         remote_node=self.op.remote_node,
10232                                         disks=[],
10233                                         mode=constants.REPLACE_DISK_CHG,
10234                                         early_release=self.op.early_release)]
10235         for instance_name in self.instance_names
10236         ]
10237
10238     else:
10239       raise errors.ProgrammerError("No iallocator or remote node")
10240
10241     return ResultWithJobs(jobs)
10242
10243
10244 def _SetOpEarlyRelease(early_release, op):
10245   """Sets C{early_release} flag on opcodes if available.
10246
10247   """
10248   try:
10249     op.early_release = early_release
10250   except AttributeError:
10251     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10252
10253   return op
10254
10255
10256 def _NodeEvacDest(use_nodes, group, nodes):
10257   """Returns group or nodes depending on caller's choice.
10258
10259   """
10260   if use_nodes:
10261     return utils.CommaJoin(nodes)
10262   else:
10263     return group
10264
10265
10266 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10267   """Unpacks the result of change-group and node-evacuate iallocator requests.
10268
10269   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10270   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10271
10272   @type lu: L{LogicalUnit}
10273   @param lu: Logical unit instance
10274   @type alloc_result: tuple/list
10275   @param alloc_result: Result from iallocator
10276   @type early_release: bool
10277   @param early_release: Whether to release locks early if possible
10278   @type use_nodes: bool
10279   @param use_nodes: Whether to display node names instead of groups
10280
10281   """
10282   (moved, failed, jobs) = alloc_result
10283
10284   if failed:
10285     lu.LogWarning("Unable to evacuate instances %s",
10286                   utils.CommaJoin("%s (%s)" % (name, reason)
10287                                   for (name, reason) in failed))
10288
10289   if moved:
10290     lu.LogInfo("Instances to be moved: %s",
10291                utils.CommaJoin("%s (to %s)" %
10292                                (name, _NodeEvacDest(use_nodes, group, nodes))
10293                                for (name, group, nodes) in moved))
10294
10295   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10296               map(opcodes.OpCode.LoadOpCode, ops))
10297           for ops in jobs]
10298
10299
10300 class LUInstanceGrowDisk(LogicalUnit):
10301   """Grow a disk of an instance.
10302
10303   """
10304   HPATH = "disk-grow"
10305   HTYPE = constants.HTYPE_INSTANCE
10306   REQ_BGL = False
10307
10308   def ExpandNames(self):
10309     self._ExpandAndLockInstance()
10310     self.needed_locks[locking.LEVEL_NODE] = []
10311     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10312
10313   def DeclareLocks(self, level):
10314     if level == locking.LEVEL_NODE:
10315       self._LockInstancesNodes()
10316
10317   def BuildHooksEnv(self):
10318     """Build hooks env.
10319
10320     This runs on the master, the primary and all the secondaries.
10321
10322     """
10323     env = {
10324       "DISK": self.op.disk,
10325       "AMOUNT": self.op.amount,
10326       }
10327     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10328     return env
10329
10330   def BuildHooksNodes(self):
10331     """Build hooks nodes.
10332
10333     """
10334     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10335     return (nl, nl)
10336
10337   def CheckPrereq(self):
10338     """Check prerequisites.
10339
10340     This checks that the instance is in the cluster.
10341
10342     """
10343     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10344     assert instance is not None, \
10345       "Cannot retrieve locked instance %s" % self.op.instance_name
10346     nodenames = list(instance.all_nodes)
10347     for node in nodenames:
10348       _CheckNodeOnline(self, node)
10349
10350     self.instance = instance
10351
10352     if instance.disk_template not in constants.DTS_GROWABLE:
10353       raise errors.OpPrereqError("Instance's disk layout does not support"
10354                                  " growing", errors.ECODE_INVAL)
10355
10356     self.disk = instance.FindDisk(self.op.disk)
10357
10358     if instance.disk_template not in (constants.DT_FILE,
10359                                       constants.DT_SHARED_FILE):
10360       # TODO: check the free disk space for file, when that feature will be
10361       # supported
10362       _CheckNodesFreeDiskPerVG(self, nodenames,
10363                                self.disk.ComputeGrowth(self.op.amount))
10364
10365   def Exec(self, feedback_fn):
10366     """Execute disk grow.
10367
10368     """
10369     instance = self.instance
10370     disk = self.disk
10371
10372     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10373     if not disks_ok:
10374       raise errors.OpExecError("Cannot activate block device to grow")
10375
10376     # First run all grow ops in dry-run mode
10377     for node in instance.all_nodes:
10378       self.cfg.SetDiskID(disk, node)
10379       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10380       result.Raise("Grow request failed to node %s" % node)
10381
10382     # We know that (as far as we can test) operations across different
10383     # nodes will succeed, time to run it for real
10384     for node in instance.all_nodes:
10385       self.cfg.SetDiskID(disk, node)
10386       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10387       result.Raise("Grow request failed to node %s" % node)
10388
10389       # TODO: Rewrite code to work properly
10390       # DRBD goes into sync mode for a short amount of time after executing the
10391       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10392       # calling "resize" in sync mode fails. Sleeping for a short amount of
10393       # time is a work-around.
10394       time.sleep(5)
10395
10396     disk.RecordGrow(self.op.amount)
10397     self.cfg.Update(instance, feedback_fn)
10398     if self.op.wait_for_sync:
10399       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10400       if disk_abort:
10401         self.proc.LogWarning("Disk sync-ing has not returned a good"
10402                              " status; please check the instance")
10403       if not instance.admin_up:
10404         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10405     elif not instance.admin_up:
10406       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10407                            " not supposed to be running because no wait for"
10408                            " sync mode was requested")
10409
10410
10411 class LUInstanceQueryData(NoHooksLU):
10412   """Query runtime instance data.
10413
10414   """
10415   REQ_BGL = False
10416
10417   def ExpandNames(self):
10418     self.needed_locks = {}
10419
10420     # Use locking if requested or when non-static information is wanted
10421     if not (self.op.static or self.op.use_locking):
10422       self.LogWarning("Non-static data requested, locks need to be acquired")
10423       self.op.use_locking = True
10424
10425     if self.op.instances or not self.op.use_locking:
10426       # Expand instance names right here
10427       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10428     else:
10429       # Will use acquired locks
10430       self.wanted_names = None
10431
10432     if self.op.use_locking:
10433       self.share_locks = _ShareAll()
10434
10435       if self.wanted_names is None:
10436         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10437       else:
10438         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10439
10440       self.needed_locks[locking.LEVEL_NODE] = []
10441       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10442
10443   def DeclareLocks(self, level):
10444     if self.op.use_locking and level == locking.LEVEL_NODE:
10445       self._LockInstancesNodes()
10446
10447   def CheckPrereq(self):
10448     """Check prerequisites.
10449
10450     This only checks the optional instance list against the existing names.
10451
10452     """
10453     if self.wanted_names is None:
10454       assert self.op.use_locking, "Locking was not used"
10455       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10456
10457     self.wanted_instances = \
10458         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10459
10460   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10461     """Returns the status of a block device
10462
10463     """
10464     if self.op.static or not node:
10465       return None
10466
10467     self.cfg.SetDiskID(dev, node)
10468
10469     result = self.rpc.call_blockdev_find(node, dev)
10470     if result.offline:
10471       return None
10472
10473     result.Raise("Can't compute disk status for %s" % instance_name)
10474
10475     status = result.payload
10476     if status is None:
10477       return None
10478
10479     return (status.dev_path, status.major, status.minor,
10480             status.sync_percent, status.estimated_time,
10481             status.is_degraded, status.ldisk_status)
10482
10483   def _ComputeDiskStatus(self, instance, snode, dev):
10484     """Compute block device status.
10485
10486     """
10487     if dev.dev_type in constants.LDS_DRBD:
10488       # we change the snode then (otherwise we use the one passed in)
10489       if dev.logical_id[0] == instance.primary_node:
10490         snode = dev.logical_id[1]
10491       else:
10492         snode = dev.logical_id[0]
10493
10494     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10495                                               instance.name, dev)
10496     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10497
10498     if dev.children:
10499       dev_children = map(compat.partial(self._ComputeDiskStatus,
10500                                         instance, snode),
10501                          dev.children)
10502     else:
10503       dev_children = []
10504
10505     return {
10506       "iv_name": dev.iv_name,
10507       "dev_type": dev.dev_type,
10508       "logical_id": dev.logical_id,
10509       "physical_id": dev.physical_id,
10510       "pstatus": dev_pstatus,
10511       "sstatus": dev_sstatus,
10512       "children": dev_children,
10513       "mode": dev.mode,
10514       "size": dev.size,
10515       }
10516
10517   def Exec(self, feedback_fn):
10518     """Gather and return data"""
10519     result = {}
10520
10521     cluster = self.cfg.GetClusterInfo()
10522
10523     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10524                                           for i in self.wanted_instances)
10525     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10526       if self.op.static or pnode.offline:
10527         remote_state = None
10528         if pnode.offline:
10529           self.LogWarning("Primary node %s is marked offline, returning static"
10530                           " information only for instance %s" %
10531                           (pnode.name, instance.name))
10532       else:
10533         remote_info = self.rpc.call_instance_info(instance.primary_node,
10534                                                   instance.name,
10535                                                   instance.hypervisor)
10536         remote_info.Raise("Error checking node %s" % instance.primary_node)
10537         remote_info = remote_info.payload
10538         if remote_info and "state" in remote_info:
10539           remote_state = "up"
10540         else:
10541           remote_state = "down"
10542
10543       if instance.admin_up:
10544         config_state = "up"
10545       else:
10546         config_state = "down"
10547
10548       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10549                   instance.disks)
10550
10551       result[instance.name] = {
10552         "name": instance.name,
10553         "config_state": config_state,
10554         "run_state": remote_state,
10555         "pnode": instance.primary_node,
10556         "snodes": instance.secondary_nodes,
10557         "os": instance.os,
10558         # this happens to be the same format used for hooks
10559         "nics": _NICListToTuple(self, instance.nics),
10560         "disk_template": instance.disk_template,
10561         "disks": disks,
10562         "hypervisor": instance.hypervisor,
10563         "network_port": instance.network_port,
10564         "hv_instance": instance.hvparams,
10565         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10566         "be_instance": instance.beparams,
10567         "be_actual": cluster.FillBE(instance),
10568         "os_instance": instance.osparams,
10569         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10570         "serial_no": instance.serial_no,
10571         "mtime": instance.mtime,
10572         "ctime": instance.ctime,
10573         "uuid": instance.uuid,
10574         }
10575
10576     return result
10577
10578
10579 class LUInstanceSetParams(LogicalUnit):
10580   """Modifies an instances's parameters.
10581
10582   """
10583   HPATH = "instance-modify"
10584   HTYPE = constants.HTYPE_INSTANCE
10585   REQ_BGL = False
10586
10587   def CheckArguments(self):
10588     if not (self.op.nics or self.op.disks or self.op.disk_template or
10589             self.op.hvparams or self.op.beparams or self.op.os_name):
10590       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10591
10592     if self.op.hvparams:
10593       _CheckGlobalHvParams(self.op.hvparams)
10594
10595     # Disk validation
10596     disk_addremove = 0
10597     for disk_op, disk_dict in self.op.disks:
10598       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10599       if disk_op == constants.DDM_REMOVE:
10600         disk_addremove += 1
10601         continue
10602       elif disk_op == constants.DDM_ADD:
10603         disk_addremove += 1
10604       else:
10605         if not isinstance(disk_op, int):
10606           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10607         if not isinstance(disk_dict, dict):
10608           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10609           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10610
10611       if disk_op == constants.DDM_ADD:
10612         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10613         if mode not in constants.DISK_ACCESS_SET:
10614           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10615                                      errors.ECODE_INVAL)
10616         size = disk_dict.get(constants.IDISK_SIZE, None)
10617         if size is None:
10618           raise errors.OpPrereqError("Required disk parameter size missing",
10619                                      errors.ECODE_INVAL)
10620         try:
10621           size = int(size)
10622         except (TypeError, ValueError), err:
10623           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10624                                      str(err), errors.ECODE_INVAL)
10625         disk_dict[constants.IDISK_SIZE] = size
10626       else:
10627         # modification of disk
10628         if constants.IDISK_SIZE in disk_dict:
10629           raise errors.OpPrereqError("Disk size change not possible, use"
10630                                      " grow-disk", errors.ECODE_INVAL)
10631
10632     if disk_addremove > 1:
10633       raise errors.OpPrereqError("Only one disk add or remove operation"
10634                                  " supported at a time", errors.ECODE_INVAL)
10635
10636     if self.op.disks and self.op.disk_template is not None:
10637       raise errors.OpPrereqError("Disk template conversion and other disk"
10638                                  " changes not supported at the same time",
10639                                  errors.ECODE_INVAL)
10640
10641     if (self.op.disk_template and
10642         self.op.disk_template in constants.DTS_INT_MIRROR and
10643         self.op.remote_node is None):
10644       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10645                                  " one requires specifying a secondary node",
10646                                  errors.ECODE_INVAL)
10647
10648     # NIC validation
10649     nic_addremove = 0
10650     for nic_op, nic_dict in self.op.nics:
10651       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10652       if nic_op == constants.DDM_REMOVE:
10653         nic_addremove += 1
10654         continue
10655       elif nic_op == constants.DDM_ADD:
10656         nic_addremove += 1
10657       else:
10658         if not isinstance(nic_op, int):
10659           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10660         if not isinstance(nic_dict, dict):
10661           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10662           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10663
10664       # nic_dict should be a dict
10665       nic_ip = nic_dict.get(constants.INIC_IP, None)
10666       if nic_ip is not None:
10667         if nic_ip.lower() == constants.VALUE_NONE:
10668           nic_dict[constants.INIC_IP] = None
10669         else:
10670           if not netutils.IPAddress.IsValid(nic_ip):
10671             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10672                                        errors.ECODE_INVAL)
10673
10674       nic_bridge = nic_dict.get("bridge", None)
10675       nic_link = nic_dict.get(constants.INIC_LINK, None)
10676       if nic_bridge and nic_link:
10677         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10678                                    " at the same time", errors.ECODE_INVAL)
10679       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10680         nic_dict["bridge"] = None
10681       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10682         nic_dict[constants.INIC_LINK] = None
10683
10684       if nic_op == constants.DDM_ADD:
10685         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10686         if nic_mac is None:
10687           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10688
10689       if constants.INIC_MAC in nic_dict:
10690         nic_mac = nic_dict[constants.INIC_MAC]
10691         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10692           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10693
10694         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10695           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10696                                      " modifying an existing nic",
10697                                      errors.ECODE_INVAL)
10698
10699     if nic_addremove > 1:
10700       raise errors.OpPrereqError("Only one NIC add or remove operation"
10701                                  " supported at a time", errors.ECODE_INVAL)
10702
10703   def ExpandNames(self):
10704     self._ExpandAndLockInstance()
10705     self.needed_locks[locking.LEVEL_NODE] = []
10706     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10707
10708   def DeclareLocks(self, level):
10709     if level == locking.LEVEL_NODE:
10710       self._LockInstancesNodes()
10711       if self.op.disk_template and self.op.remote_node:
10712         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10713         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10714
10715   def BuildHooksEnv(self):
10716     """Build hooks env.
10717
10718     This runs on the master, primary and secondaries.
10719
10720     """
10721     args = dict()
10722     if constants.BE_MEMORY in self.be_new:
10723       args["memory"] = self.be_new[constants.BE_MEMORY]
10724     if constants.BE_VCPUS in self.be_new:
10725       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10726     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10727     # information at all.
10728     if self.op.nics:
10729       args["nics"] = []
10730       nic_override = dict(self.op.nics)
10731       for idx, nic in enumerate(self.instance.nics):
10732         if idx in nic_override:
10733           this_nic_override = nic_override[idx]
10734         else:
10735           this_nic_override = {}
10736         if constants.INIC_IP in this_nic_override:
10737           ip = this_nic_override[constants.INIC_IP]
10738         else:
10739           ip = nic.ip
10740         if constants.INIC_MAC in this_nic_override:
10741           mac = this_nic_override[constants.INIC_MAC]
10742         else:
10743           mac = nic.mac
10744         if idx in self.nic_pnew:
10745           nicparams = self.nic_pnew[idx]
10746         else:
10747           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10748         mode = nicparams[constants.NIC_MODE]
10749         link = nicparams[constants.NIC_LINK]
10750         args["nics"].append((ip, mac, mode, link))
10751       if constants.DDM_ADD in nic_override:
10752         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10753         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10754         nicparams = self.nic_pnew[constants.DDM_ADD]
10755         mode = nicparams[constants.NIC_MODE]
10756         link = nicparams[constants.NIC_LINK]
10757         args["nics"].append((ip, mac, mode, link))
10758       elif constants.DDM_REMOVE in nic_override:
10759         del args["nics"][-1]
10760
10761     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10762     if self.op.disk_template:
10763       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10764
10765     return env
10766
10767   def BuildHooksNodes(self):
10768     """Build hooks nodes.
10769
10770     """
10771     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10772     return (nl, nl)
10773
10774   def CheckPrereq(self):
10775     """Check prerequisites.
10776
10777     This only checks the instance list against the existing names.
10778
10779     """
10780     # checking the new params on the primary/secondary nodes
10781
10782     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10783     cluster = self.cluster = self.cfg.GetClusterInfo()
10784     assert self.instance is not None, \
10785       "Cannot retrieve locked instance %s" % self.op.instance_name
10786     pnode = instance.primary_node
10787     nodelist = list(instance.all_nodes)
10788
10789     # OS change
10790     if self.op.os_name and not self.op.force:
10791       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10792                       self.op.force_variant)
10793       instance_os = self.op.os_name
10794     else:
10795       instance_os = instance.os
10796
10797     if self.op.disk_template:
10798       if instance.disk_template == self.op.disk_template:
10799         raise errors.OpPrereqError("Instance already has disk template %s" %
10800                                    instance.disk_template, errors.ECODE_INVAL)
10801
10802       if (instance.disk_template,
10803           self.op.disk_template) not in self._DISK_CONVERSIONS:
10804         raise errors.OpPrereqError("Unsupported disk template conversion from"
10805                                    " %s to %s" % (instance.disk_template,
10806                                                   self.op.disk_template),
10807                                    errors.ECODE_INVAL)
10808       _CheckInstanceDown(self, instance, "cannot change disk template")
10809       if self.op.disk_template in constants.DTS_INT_MIRROR:
10810         if self.op.remote_node == pnode:
10811           raise errors.OpPrereqError("Given new secondary node %s is the same"
10812                                      " as the primary node of the instance" %
10813                                      self.op.remote_node, errors.ECODE_STATE)
10814         _CheckNodeOnline(self, self.op.remote_node)
10815         _CheckNodeNotDrained(self, self.op.remote_node)
10816         # FIXME: here we assume that the old instance type is DT_PLAIN
10817         assert instance.disk_template == constants.DT_PLAIN
10818         disks = [{constants.IDISK_SIZE: d.size,
10819                   constants.IDISK_VG: d.logical_id[0]}
10820                  for d in instance.disks]
10821         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10822         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10823
10824     # hvparams processing
10825     if self.op.hvparams:
10826       hv_type = instance.hypervisor
10827       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10828       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10829       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10830
10831       # local check
10832       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10833       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10834       self.hv_new = hv_new # the new actual values
10835       self.hv_inst = i_hvdict # the new dict (without defaults)
10836     else:
10837       self.hv_new = self.hv_inst = {}
10838
10839     # beparams processing
10840     if self.op.beparams:
10841       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10842                                    use_none=True)
10843       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10844       be_new = cluster.SimpleFillBE(i_bedict)
10845       self.be_new = be_new # the new actual values
10846       self.be_inst = i_bedict # the new dict (without defaults)
10847     else:
10848       self.be_new = self.be_inst = {}
10849     be_old = cluster.FillBE(instance)
10850
10851     # osparams processing
10852     if self.op.osparams:
10853       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10854       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10855       self.os_inst = i_osdict # the new dict (without defaults)
10856     else:
10857       self.os_inst = {}
10858
10859     self.warn = []
10860
10861     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10862         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10863       mem_check_list = [pnode]
10864       if be_new[constants.BE_AUTO_BALANCE]:
10865         # either we changed auto_balance to yes or it was from before
10866         mem_check_list.extend(instance.secondary_nodes)
10867       instance_info = self.rpc.call_instance_info(pnode, instance.name,
10868                                                   instance.hypervisor)
10869       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10870                                          instance.hypervisor)
10871       pninfo = nodeinfo[pnode]
10872       msg = pninfo.fail_msg
10873       if msg:
10874         # Assume the primary node is unreachable and go ahead
10875         self.warn.append("Can't get info from primary node %s: %s" %
10876                          (pnode, msg))
10877       elif not isinstance(pninfo.payload.get("memory_free", None), int):
10878         self.warn.append("Node data from primary node %s doesn't contain"
10879                          " free memory information" % pnode)
10880       elif instance_info.fail_msg:
10881         self.warn.append("Can't get instance runtime information: %s" %
10882                         instance_info.fail_msg)
10883       else:
10884         if instance_info.payload:
10885           current_mem = int(instance_info.payload["memory"])
10886         else:
10887           # Assume instance not running
10888           # (there is a slight race condition here, but it's not very probable,
10889           # and we have no other way to check)
10890           current_mem = 0
10891         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10892                     pninfo.payload["memory_free"])
10893         if miss_mem > 0:
10894           raise errors.OpPrereqError("This change will prevent the instance"
10895                                      " from starting, due to %d MB of memory"
10896                                      " missing on its primary node" % miss_mem,
10897                                      errors.ECODE_NORES)
10898
10899       if be_new[constants.BE_AUTO_BALANCE]:
10900         for node, nres in nodeinfo.items():
10901           if node not in instance.secondary_nodes:
10902             continue
10903           nres.Raise("Can't get info from secondary node %s" % node,
10904                      prereq=True, ecode=errors.ECODE_STATE)
10905           if not isinstance(nres.payload.get("memory_free", None), int):
10906             raise errors.OpPrereqError("Secondary node %s didn't return free"
10907                                        " memory information" % node,
10908                                        errors.ECODE_STATE)
10909           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10910             raise errors.OpPrereqError("This change will prevent the instance"
10911                                        " from failover to its secondary node"
10912                                        " %s, due to not enough memory" % node,
10913                                        errors.ECODE_STATE)
10914
10915     # NIC processing
10916     self.nic_pnew = {}
10917     self.nic_pinst = {}
10918     for nic_op, nic_dict in self.op.nics:
10919       if nic_op == constants.DDM_REMOVE:
10920         if not instance.nics:
10921           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10922                                      errors.ECODE_INVAL)
10923         continue
10924       if nic_op != constants.DDM_ADD:
10925         # an existing nic
10926         if not instance.nics:
10927           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10928                                      " no NICs" % nic_op,
10929                                      errors.ECODE_INVAL)
10930         if nic_op < 0 or nic_op >= len(instance.nics):
10931           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10932                                      " are 0 to %d" %
10933                                      (nic_op, len(instance.nics) - 1),
10934                                      errors.ECODE_INVAL)
10935         old_nic_params = instance.nics[nic_op].nicparams
10936         old_nic_ip = instance.nics[nic_op].ip
10937       else:
10938         old_nic_params = {}
10939         old_nic_ip = None
10940
10941       update_params_dict = dict([(key, nic_dict[key])
10942                                  for key in constants.NICS_PARAMETERS
10943                                  if key in nic_dict])
10944
10945       if "bridge" in nic_dict:
10946         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10947
10948       new_nic_params = _GetUpdatedParams(old_nic_params,
10949                                          update_params_dict)
10950       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10951       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10952       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10953       self.nic_pinst[nic_op] = new_nic_params
10954       self.nic_pnew[nic_op] = new_filled_nic_params
10955       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10956
10957       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10958         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10959         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10960         if msg:
10961           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10962           if self.op.force:
10963             self.warn.append(msg)
10964           else:
10965             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10966       if new_nic_mode == constants.NIC_MODE_ROUTED:
10967         if constants.INIC_IP in nic_dict:
10968           nic_ip = nic_dict[constants.INIC_IP]
10969         else:
10970           nic_ip = old_nic_ip
10971         if nic_ip is None:
10972           raise errors.OpPrereqError("Cannot set the nic ip to None"
10973                                      " on a routed nic", errors.ECODE_INVAL)
10974       if constants.INIC_MAC in nic_dict:
10975         nic_mac = nic_dict[constants.INIC_MAC]
10976         if nic_mac is None:
10977           raise errors.OpPrereqError("Cannot set the nic mac to None",
10978                                      errors.ECODE_INVAL)
10979         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10980           # otherwise generate the mac
10981           nic_dict[constants.INIC_MAC] = \
10982             self.cfg.GenerateMAC(self.proc.GetECId())
10983         else:
10984           # or validate/reserve the current one
10985           try:
10986             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10987           except errors.ReservationError:
10988             raise errors.OpPrereqError("MAC address %s already in use"
10989                                        " in cluster" % nic_mac,
10990                                        errors.ECODE_NOTUNIQUE)
10991
10992     # DISK processing
10993     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10994       raise errors.OpPrereqError("Disk operations not supported for"
10995                                  " diskless instances",
10996                                  errors.ECODE_INVAL)
10997     for disk_op, _ in self.op.disks:
10998       if disk_op == constants.DDM_REMOVE:
10999         if len(instance.disks) == 1:
11000           raise errors.OpPrereqError("Cannot remove the last disk of"
11001                                      " an instance", errors.ECODE_INVAL)
11002         _CheckInstanceDown(self, instance, "cannot remove disks")
11003
11004       if (disk_op == constants.DDM_ADD and
11005           len(instance.disks) >= constants.MAX_DISKS):
11006         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11007                                    " add more" % constants.MAX_DISKS,
11008                                    errors.ECODE_STATE)
11009       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11010         # an existing disk
11011         if disk_op < 0 or disk_op >= len(instance.disks):
11012           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11013                                      " are 0 to %d" %
11014                                      (disk_op, len(instance.disks)),
11015                                      errors.ECODE_INVAL)
11016
11017     return
11018
11019   def _ConvertPlainToDrbd(self, feedback_fn):
11020     """Converts an instance from plain to drbd.
11021
11022     """
11023     feedback_fn("Converting template to drbd")
11024     instance = self.instance
11025     pnode = instance.primary_node
11026     snode = self.op.remote_node
11027
11028     # create a fake disk info for _GenerateDiskTemplate
11029     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11030                   constants.IDISK_VG: d.logical_id[0]}
11031                  for d in instance.disks]
11032     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11033                                       instance.name, pnode, [snode],
11034                                       disk_info, None, None, 0, feedback_fn)
11035     info = _GetInstanceInfoText(instance)
11036     feedback_fn("Creating aditional volumes...")
11037     # first, create the missing data and meta devices
11038     for disk in new_disks:
11039       # unfortunately this is... not too nice
11040       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11041                             info, True)
11042       for child in disk.children:
11043         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11044     # at this stage, all new LVs have been created, we can rename the
11045     # old ones
11046     feedback_fn("Renaming original volumes...")
11047     rename_list = [(o, n.children[0].logical_id)
11048                    for (o, n) in zip(instance.disks, new_disks)]
11049     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11050     result.Raise("Failed to rename original LVs")
11051
11052     feedback_fn("Initializing DRBD devices...")
11053     # all child devices are in place, we can now create the DRBD devices
11054     for disk in new_disks:
11055       for node in [pnode, snode]:
11056         f_create = node == pnode
11057         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11058
11059     # at this point, the instance has been modified
11060     instance.disk_template = constants.DT_DRBD8
11061     instance.disks = new_disks
11062     self.cfg.Update(instance, feedback_fn)
11063
11064     # disks are created, waiting for sync
11065     disk_abort = not _WaitForSync(self, instance,
11066                                   oneshot=not self.op.wait_for_sync)
11067     if disk_abort:
11068       raise errors.OpExecError("There are some degraded disks for"
11069                                " this instance, please cleanup manually")
11070
11071   def _ConvertDrbdToPlain(self, feedback_fn):
11072     """Converts an instance from drbd to plain.
11073
11074     """
11075     instance = self.instance
11076     assert len(instance.secondary_nodes) == 1
11077     pnode = instance.primary_node
11078     snode = instance.secondary_nodes[0]
11079     feedback_fn("Converting template to plain")
11080
11081     old_disks = instance.disks
11082     new_disks = [d.children[0] for d in old_disks]
11083
11084     # copy over size and mode
11085     for parent, child in zip(old_disks, new_disks):
11086       child.size = parent.size
11087       child.mode = parent.mode
11088
11089     # update instance structure
11090     instance.disks = new_disks
11091     instance.disk_template = constants.DT_PLAIN
11092     self.cfg.Update(instance, feedback_fn)
11093
11094     feedback_fn("Removing volumes on the secondary node...")
11095     for disk in old_disks:
11096       self.cfg.SetDiskID(disk, snode)
11097       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11098       if msg:
11099         self.LogWarning("Could not remove block device %s on node %s,"
11100                         " continuing anyway: %s", disk.iv_name, snode, msg)
11101
11102     feedback_fn("Removing unneeded volumes on the primary node...")
11103     for idx, disk in enumerate(old_disks):
11104       meta = disk.children[1]
11105       self.cfg.SetDiskID(meta, pnode)
11106       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11107       if msg:
11108         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11109                         " continuing anyway: %s", idx, pnode, msg)
11110
11111   def Exec(self, feedback_fn):
11112     """Modifies an instance.
11113
11114     All parameters take effect only at the next restart of the instance.
11115
11116     """
11117     # Process here the warnings from CheckPrereq, as we don't have a
11118     # feedback_fn there.
11119     for warn in self.warn:
11120       feedback_fn("WARNING: %s" % warn)
11121
11122     result = []
11123     instance = self.instance
11124     # disk changes
11125     for disk_op, disk_dict in self.op.disks:
11126       if disk_op == constants.DDM_REMOVE:
11127         # remove the last disk
11128         device = instance.disks.pop()
11129         device_idx = len(instance.disks)
11130         for node, disk in device.ComputeNodeTree(instance.primary_node):
11131           self.cfg.SetDiskID(disk, node)
11132           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11133           if msg:
11134             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11135                             " continuing anyway", device_idx, node, msg)
11136         result.append(("disk/%d" % device_idx, "remove"))
11137       elif disk_op == constants.DDM_ADD:
11138         # add a new disk
11139         if instance.disk_template in (constants.DT_FILE,
11140                                         constants.DT_SHARED_FILE):
11141           file_driver, file_path = instance.disks[0].logical_id
11142           file_path = os.path.dirname(file_path)
11143         else:
11144           file_driver = file_path = None
11145         disk_idx_base = len(instance.disks)
11146         new_disk = _GenerateDiskTemplate(self,
11147                                          instance.disk_template,
11148                                          instance.name, instance.primary_node,
11149                                          instance.secondary_nodes,
11150                                          [disk_dict],
11151                                          file_path,
11152                                          file_driver,
11153                                          disk_idx_base, feedback_fn)[0]
11154         instance.disks.append(new_disk)
11155         info = _GetInstanceInfoText(instance)
11156
11157         logging.info("Creating volume %s for instance %s",
11158                      new_disk.iv_name, instance.name)
11159         # Note: this needs to be kept in sync with _CreateDisks
11160         #HARDCODE
11161         for node in instance.all_nodes:
11162           f_create = node == instance.primary_node
11163           try:
11164             _CreateBlockDev(self, node, instance, new_disk,
11165                             f_create, info, f_create)
11166           except errors.OpExecError, err:
11167             self.LogWarning("Failed to create volume %s (%s) on"
11168                             " node %s: %s",
11169                             new_disk.iv_name, new_disk, node, err)
11170         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11171                        (new_disk.size, new_disk.mode)))
11172       else:
11173         # change a given disk
11174         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11175         result.append(("disk.mode/%d" % disk_op,
11176                        disk_dict[constants.IDISK_MODE]))
11177
11178     if self.op.disk_template:
11179       r_shut = _ShutdownInstanceDisks(self, instance)
11180       if not r_shut:
11181         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11182                                  " proceed with disk template conversion")
11183       mode = (instance.disk_template, self.op.disk_template)
11184       try:
11185         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11186       except:
11187         self.cfg.ReleaseDRBDMinors(instance.name)
11188         raise
11189       result.append(("disk_template", self.op.disk_template))
11190
11191     # NIC changes
11192     for nic_op, nic_dict in self.op.nics:
11193       if nic_op == constants.DDM_REMOVE:
11194         # remove the last nic
11195         del instance.nics[-1]
11196         result.append(("nic.%d" % len(instance.nics), "remove"))
11197       elif nic_op == constants.DDM_ADD:
11198         # mac and bridge should be set, by now
11199         mac = nic_dict[constants.INIC_MAC]
11200         ip = nic_dict.get(constants.INIC_IP, None)
11201         nicparams = self.nic_pinst[constants.DDM_ADD]
11202         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11203         instance.nics.append(new_nic)
11204         result.append(("nic.%d" % (len(instance.nics) - 1),
11205                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11206                        (new_nic.mac, new_nic.ip,
11207                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11208                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11209                        )))
11210       else:
11211         for key in (constants.INIC_MAC, constants.INIC_IP):
11212           if key in nic_dict:
11213             setattr(instance.nics[nic_op], key, nic_dict[key])
11214         if nic_op in self.nic_pinst:
11215           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11216         for key, val in nic_dict.iteritems():
11217           result.append(("nic.%s/%d" % (key, nic_op), val))
11218
11219     # hvparams changes
11220     if self.op.hvparams:
11221       instance.hvparams = self.hv_inst
11222       for key, val in self.op.hvparams.iteritems():
11223         result.append(("hv/%s" % key, val))
11224
11225     # beparams changes
11226     if self.op.beparams:
11227       instance.beparams = self.be_inst
11228       for key, val in self.op.beparams.iteritems():
11229         result.append(("be/%s" % key, val))
11230
11231     # OS change
11232     if self.op.os_name:
11233       instance.os = self.op.os_name
11234
11235     # osparams changes
11236     if self.op.osparams:
11237       instance.osparams = self.os_inst
11238       for key, val in self.op.osparams.iteritems():
11239         result.append(("os/%s" % key, val))
11240
11241     self.cfg.Update(instance, feedback_fn)
11242
11243     return result
11244
11245   _DISK_CONVERSIONS = {
11246     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11247     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11248     }
11249
11250
11251 class LUInstanceChangeGroup(LogicalUnit):
11252   HPATH = "instance-change-group"
11253   HTYPE = constants.HTYPE_INSTANCE
11254   REQ_BGL = False
11255
11256   def ExpandNames(self):
11257     self.share_locks = _ShareAll()
11258     self.needed_locks = {
11259       locking.LEVEL_NODEGROUP: [],
11260       locking.LEVEL_NODE: [],
11261       }
11262
11263     self._ExpandAndLockInstance()
11264
11265     if self.op.target_groups:
11266       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11267                                   self.op.target_groups)
11268     else:
11269       self.req_target_uuids = None
11270
11271     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11272
11273   def DeclareLocks(self, level):
11274     if level == locking.LEVEL_NODEGROUP:
11275       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11276
11277       if self.req_target_uuids:
11278         lock_groups = set(self.req_target_uuids)
11279
11280         # Lock all groups used by instance optimistically; this requires going
11281         # via the node before it's locked, requiring verification later on
11282         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11283         lock_groups.update(instance_groups)
11284       else:
11285         # No target groups, need to lock all of them
11286         lock_groups = locking.ALL_SET
11287
11288       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11289
11290     elif level == locking.LEVEL_NODE:
11291       if self.req_target_uuids:
11292         # Lock all nodes used by instances
11293         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11294         self._LockInstancesNodes()
11295
11296         # Lock all nodes in all potential target groups
11297         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11298                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11299         member_nodes = [node_name
11300                         for group in lock_groups
11301                         for node_name in self.cfg.GetNodeGroup(group).members]
11302         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11303       else:
11304         # Lock all nodes as all groups are potential targets
11305         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11306
11307   def CheckPrereq(self):
11308     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11309     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11310     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11311
11312     assert (self.req_target_uuids is None or
11313             owned_groups.issuperset(self.req_target_uuids))
11314     assert owned_instances == set([self.op.instance_name])
11315
11316     # Get instance information
11317     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11318
11319     # Check if node groups for locked instance are still correct
11320     assert owned_nodes.issuperset(self.instance.all_nodes), \
11321       ("Instance %s's nodes changed while we kept the lock" %
11322        self.op.instance_name)
11323
11324     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11325                                            owned_groups)
11326
11327     if self.req_target_uuids:
11328       # User requested specific target groups
11329       self.target_uuids = self.req_target_uuids
11330     else:
11331       # All groups except those used by the instance are potential targets
11332       self.target_uuids = owned_groups - inst_groups
11333
11334     conflicting_groups = self.target_uuids & inst_groups
11335     if conflicting_groups:
11336       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11337                                  " used by the instance '%s'" %
11338                                  (utils.CommaJoin(conflicting_groups),
11339                                   self.op.instance_name),
11340                                  errors.ECODE_INVAL)
11341
11342     if not self.target_uuids:
11343       raise errors.OpPrereqError("There are no possible target groups",
11344                                  errors.ECODE_INVAL)
11345
11346   def BuildHooksEnv(self):
11347     """Build hooks env.
11348
11349     """
11350     assert self.target_uuids
11351
11352     env = {
11353       "TARGET_GROUPS": " ".join(self.target_uuids),
11354       }
11355
11356     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11357
11358     return env
11359
11360   def BuildHooksNodes(self):
11361     """Build hooks nodes.
11362
11363     """
11364     mn = self.cfg.GetMasterNode()
11365     return ([mn], [mn])
11366
11367   def Exec(self, feedback_fn):
11368     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11369
11370     assert instances == [self.op.instance_name], "Instance not locked"
11371
11372     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11373                      instances=instances, target_groups=list(self.target_uuids))
11374
11375     ial.Run(self.op.iallocator)
11376
11377     if not ial.success:
11378       raise errors.OpPrereqError("Can't compute solution for changing group of"
11379                                  " instance '%s' using iallocator '%s': %s" %
11380                                  (self.op.instance_name, self.op.iallocator,
11381                                   ial.info),
11382                                  errors.ECODE_NORES)
11383
11384     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11385
11386     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11387                  " instance '%s'", len(jobs), self.op.instance_name)
11388
11389     return ResultWithJobs(jobs)
11390
11391
11392 class LUBackupQuery(NoHooksLU):
11393   """Query the exports list
11394
11395   """
11396   REQ_BGL = False
11397
11398   def ExpandNames(self):
11399     self.needed_locks = {}
11400     self.share_locks[locking.LEVEL_NODE] = 1
11401     if not self.op.nodes:
11402       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11403     else:
11404       self.needed_locks[locking.LEVEL_NODE] = \
11405         _GetWantedNodes(self, self.op.nodes)
11406
11407   def Exec(self, feedback_fn):
11408     """Compute the list of all the exported system images.
11409
11410     @rtype: dict
11411     @return: a dictionary with the structure node->(export-list)
11412         where export-list is a list of the instances exported on
11413         that node.
11414
11415     """
11416     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11417     rpcresult = self.rpc.call_export_list(self.nodes)
11418     result = {}
11419     for node in rpcresult:
11420       if rpcresult[node].fail_msg:
11421         result[node] = False
11422       else:
11423         result[node] = rpcresult[node].payload
11424
11425     return result
11426
11427
11428 class LUBackupPrepare(NoHooksLU):
11429   """Prepares an instance for an export and returns useful information.
11430
11431   """
11432   REQ_BGL = False
11433
11434   def ExpandNames(self):
11435     self._ExpandAndLockInstance()
11436
11437   def CheckPrereq(self):
11438     """Check prerequisites.
11439
11440     """
11441     instance_name = self.op.instance_name
11442
11443     self.instance = self.cfg.GetInstanceInfo(instance_name)
11444     assert self.instance is not None, \
11445           "Cannot retrieve locked instance %s" % self.op.instance_name
11446     _CheckNodeOnline(self, self.instance.primary_node)
11447
11448     self._cds = _GetClusterDomainSecret()
11449
11450   def Exec(self, feedback_fn):
11451     """Prepares an instance for an export.
11452
11453     """
11454     instance = self.instance
11455
11456     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11457       salt = utils.GenerateSecret(8)
11458
11459       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11460       result = self.rpc.call_x509_cert_create(instance.primary_node,
11461                                               constants.RIE_CERT_VALIDITY)
11462       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11463
11464       (name, cert_pem) = result.payload
11465
11466       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11467                                              cert_pem)
11468
11469       return {
11470         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11471         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11472                           salt),
11473         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11474         }
11475
11476     return None
11477
11478
11479 class LUBackupExport(LogicalUnit):
11480   """Export an instance to an image in the cluster.
11481
11482   """
11483   HPATH = "instance-export"
11484   HTYPE = constants.HTYPE_INSTANCE
11485   REQ_BGL = False
11486
11487   def CheckArguments(self):
11488     """Check the arguments.
11489
11490     """
11491     self.x509_key_name = self.op.x509_key_name
11492     self.dest_x509_ca_pem = self.op.destination_x509_ca
11493
11494     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11495       if not self.x509_key_name:
11496         raise errors.OpPrereqError("Missing X509 key name for encryption",
11497                                    errors.ECODE_INVAL)
11498
11499       if not self.dest_x509_ca_pem:
11500         raise errors.OpPrereqError("Missing destination X509 CA",
11501                                    errors.ECODE_INVAL)
11502
11503   def ExpandNames(self):
11504     self._ExpandAndLockInstance()
11505
11506     # Lock all nodes for local exports
11507     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11508       # FIXME: lock only instance primary and destination node
11509       #
11510       # Sad but true, for now we have do lock all nodes, as we don't know where
11511       # the previous export might be, and in this LU we search for it and
11512       # remove it from its current node. In the future we could fix this by:
11513       #  - making a tasklet to search (share-lock all), then create the
11514       #    new one, then one to remove, after
11515       #  - removing the removal operation altogether
11516       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11517
11518   def DeclareLocks(self, level):
11519     """Last minute lock declaration."""
11520     # All nodes are locked anyway, so nothing to do here.
11521
11522   def BuildHooksEnv(self):
11523     """Build hooks env.
11524
11525     This will run on the master, primary node and target node.
11526
11527     """
11528     env = {
11529       "EXPORT_MODE": self.op.mode,
11530       "EXPORT_NODE": self.op.target_node,
11531       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11532       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11533       # TODO: Generic function for boolean env variables
11534       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11535       }
11536
11537     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11538
11539     return env
11540
11541   def BuildHooksNodes(self):
11542     """Build hooks nodes.
11543
11544     """
11545     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11546
11547     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11548       nl.append(self.op.target_node)
11549
11550     return (nl, nl)
11551
11552   def CheckPrereq(self):
11553     """Check prerequisites.
11554
11555     This checks that the instance and node names are valid.
11556
11557     """
11558     instance_name = self.op.instance_name
11559
11560     self.instance = self.cfg.GetInstanceInfo(instance_name)
11561     assert self.instance is not None, \
11562           "Cannot retrieve locked instance %s" % self.op.instance_name
11563     _CheckNodeOnline(self, self.instance.primary_node)
11564
11565     if (self.op.remove_instance and self.instance.admin_up and
11566         not self.op.shutdown):
11567       raise errors.OpPrereqError("Can not remove instance without shutting it"
11568                                  " down before")
11569
11570     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11571       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11572       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11573       assert self.dst_node is not None
11574
11575       _CheckNodeOnline(self, self.dst_node.name)
11576       _CheckNodeNotDrained(self, self.dst_node.name)
11577
11578       self._cds = None
11579       self.dest_disk_info = None
11580       self.dest_x509_ca = None
11581
11582     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11583       self.dst_node = None
11584
11585       if len(self.op.target_node) != len(self.instance.disks):
11586         raise errors.OpPrereqError(("Received destination information for %s"
11587                                     " disks, but instance %s has %s disks") %
11588                                    (len(self.op.target_node), instance_name,
11589                                     len(self.instance.disks)),
11590                                    errors.ECODE_INVAL)
11591
11592       cds = _GetClusterDomainSecret()
11593
11594       # Check X509 key name
11595       try:
11596         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11597       except (TypeError, ValueError), err:
11598         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11599
11600       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11601         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11602                                    errors.ECODE_INVAL)
11603
11604       # Load and verify CA
11605       try:
11606         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11607       except OpenSSL.crypto.Error, err:
11608         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11609                                    (err, ), errors.ECODE_INVAL)
11610
11611       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11612       if errcode is not None:
11613         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11614                                    (msg, ), errors.ECODE_INVAL)
11615
11616       self.dest_x509_ca = cert
11617
11618       # Verify target information
11619       disk_info = []
11620       for idx, disk_data in enumerate(self.op.target_node):
11621         try:
11622           (host, port, magic) = \
11623             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11624         except errors.GenericError, err:
11625           raise errors.OpPrereqError("Target info for disk %s: %s" %
11626                                      (idx, err), errors.ECODE_INVAL)
11627
11628         disk_info.append((host, port, magic))
11629
11630       assert len(disk_info) == len(self.op.target_node)
11631       self.dest_disk_info = disk_info
11632
11633     else:
11634       raise errors.ProgrammerError("Unhandled export mode %r" %
11635                                    self.op.mode)
11636
11637     # instance disk type verification
11638     # TODO: Implement export support for file-based disks
11639     for disk in self.instance.disks:
11640       if disk.dev_type == constants.LD_FILE:
11641         raise errors.OpPrereqError("Export not supported for instances with"
11642                                    " file-based disks", errors.ECODE_INVAL)
11643
11644   def _CleanupExports(self, feedback_fn):
11645     """Removes exports of current instance from all other nodes.
11646
11647     If an instance in a cluster with nodes A..D was exported to node C, its
11648     exports will be removed from the nodes A, B and D.
11649
11650     """
11651     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11652
11653     nodelist = self.cfg.GetNodeList()
11654     nodelist.remove(self.dst_node.name)
11655
11656     # on one-node clusters nodelist will be empty after the removal
11657     # if we proceed the backup would be removed because OpBackupQuery
11658     # substitutes an empty list with the full cluster node list.
11659     iname = self.instance.name
11660     if nodelist:
11661       feedback_fn("Removing old exports for instance %s" % iname)
11662       exportlist = self.rpc.call_export_list(nodelist)
11663       for node in exportlist:
11664         if exportlist[node].fail_msg:
11665           continue
11666         if iname in exportlist[node].payload:
11667           msg = self.rpc.call_export_remove(node, iname).fail_msg
11668           if msg:
11669             self.LogWarning("Could not remove older export for instance %s"
11670                             " on node %s: %s", iname, node, msg)
11671
11672   def Exec(self, feedback_fn):
11673     """Export an instance to an image in the cluster.
11674
11675     """
11676     assert self.op.mode in constants.EXPORT_MODES
11677
11678     instance = self.instance
11679     src_node = instance.primary_node
11680
11681     if self.op.shutdown:
11682       # shutdown the instance, but not the disks
11683       feedback_fn("Shutting down instance %s" % instance.name)
11684       result = self.rpc.call_instance_shutdown(src_node, instance,
11685                                                self.op.shutdown_timeout)
11686       # TODO: Maybe ignore failures if ignore_remove_failures is set
11687       result.Raise("Could not shutdown instance %s on"
11688                    " node %s" % (instance.name, src_node))
11689
11690     # set the disks ID correctly since call_instance_start needs the
11691     # correct drbd minor to create the symlinks
11692     for disk in instance.disks:
11693       self.cfg.SetDiskID(disk, src_node)
11694
11695     activate_disks = (not instance.admin_up)
11696
11697     if activate_disks:
11698       # Activate the instance disks if we'exporting a stopped instance
11699       feedback_fn("Activating disks for %s" % instance.name)
11700       _StartInstanceDisks(self, instance, None)
11701
11702     try:
11703       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11704                                                      instance)
11705
11706       helper.CreateSnapshots()
11707       try:
11708         if (self.op.shutdown and instance.admin_up and
11709             not self.op.remove_instance):
11710           assert not activate_disks
11711           feedback_fn("Starting instance %s" % instance.name)
11712           result = self.rpc.call_instance_start(src_node, instance,
11713                                                 None, None, False)
11714           msg = result.fail_msg
11715           if msg:
11716             feedback_fn("Failed to start instance: %s" % msg)
11717             _ShutdownInstanceDisks(self, instance)
11718             raise errors.OpExecError("Could not start instance: %s" % msg)
11719
11720         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11721           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11722         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11723           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11724           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11725
11726           (key_name, _, _) = self.x509_key_name
11727
11728           dest_ca_pem = \
11729             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11730                                             self.dest_x509_ca)
11731
11732           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11733                                                      key_name, dest_ca_pem,
11734                                                      timeouts)
11735       finally:
11736         helper.Cleanup()
11737
11738       # Check for backwards compatibility
11739       assert len(dresults) == len(instance.disks)
11740       assert compat.all(isinstance(i, bool) for i in dresults), \
11741              "Not all results are boolean: %r" % dresults
11742
11743     finally:
11744       if activate_disks:
11745         feedback_fn("Deactivating disks for %s" % instance.name)
11746         _ShutdownInstanceDisks(self, instance)
11747
11748     if not (compat.all(dresults) and fin_resu):
11749       failures = []
11750       if not fin_resu:
11751         failures.append("export finalization")
11752       if not compat.all(dresults):
11753         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11754                                if not dsk)
11755         failures.append("disk export: disk(s) %s" % fdsk)
11756
11757       raise errors.OpExecError("Export failed, errors in %s" %
11758                                utils.CommaJoin(failures))
11759
11760     # At this point, the export was successful, we can cleanup/finish
11761
11762     # Remove instance if requested
11763     if self.op.remove_instance:
11764       feedback_fn("Removing instance %s" % instance.name)
11765       _RemoveInstance(self, feedback_fn, instance,
11766                       self.op.ignore_remove_failures)
11767
11768     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11769       self._CleanupExports(feedback_fn)
11770
11771     return fin_resu, dresults
11772
11773
11774 class LUBackupRemove(NoHooksLU):
11775   """Remove exports related to the named instance.
11776
11777   """
11778   REQ_BGL = False
11779
11780   def ExpandNames(self):
11781     self.needed_locks = {}
11782     # We need all nodes to be locked in order for RemoveExport to work, but we
11783     # don't need to lock the instance itself, as nothing will happen to it (and
11784     # we can remove exports also for a removed instance)
11785     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11786
11787   def Exec(self, feedback_fn):
11788     """Remove any export.
11789
11790     """
11791     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11792     # If the instance was not found we'll try with the name that was passed in.
11793     # This will only work if it was an FQDN, though.
11794     fqdn_warn = False
11795     if not instance_name:
11796       fqdn_warn = True
11797       instance_name = self.op.instance_name
11798
11799     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11800     exportlist = self.rpc.call_export_list(locked_nodes)
11801     found = False
11802     for node in exportlist:
11803       msg = exportlist[node].fail_msg
11804       if msg:
11805         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11806         continue
11807       if instance_name in exportlist[node].payload:
11808         found = True
11809         result = self.rpc.call_export_remove(node, instance_name)
11810         msg = result.fail_msg
11811         if msg:
11812           logging.error("Could not remove export for instance %s"
11813                         " on node %s: %s", instance_name, node, msg)
11814
11815     if fqdn_warn and not found:
11816       feedback_fn("Export not found. If trying to remove an export belonging"
11817                   " to a deleted instance please use its Fully Qualified"
11818                   " Domain Name.")
11819
11820
11821 class LUGroupAdd(LogicalUnit):
11822   """Logical unit for creating node groups.
11823
11824   """
11825   HPATH = "group-add"
11826   HTYPE = constants.HTYPE_GROUP
11827   REQ_BGL = False
11828
11829   def ExpandNames(self):
11830     # We need the new group's UUID here so that we can create and acquire the
11831     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11832     # that it should not check whether the UUID exists in the configuration.
11833     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11834     self.needed_locks = {}
11835     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11836
11837   def CheckPrereq(self):
11838     """Check prerequisites.
11839
11840     This checks that the given group name is not an existing node group
11841     already.
11842
11843     """
11844     try:
11845       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11846     except errors.OpPrereqError:
11847       pass
11848     else:
11849       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11850                                  " node group (UUID: %s)" %
11851                                  (self.op.group_name, existing_uuid),
11852                                  errors.ECODE_EXISTS)
11853
11854     if self.op.ndparams:
11855       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11856
11857   def BuildHooksEnv(self):
11858     """Build hooks env.
11859
11860     """
11861     return {
11862       "GROUP_NAME": self.op.group_name,
11863       }
11864
11865   def BuildHooksNodes(self):
11866     """Build hooks nodes.
11867
11868     """
11869     mn = self.cfg.GetMasterNode()
11870     return ([mn], [mn])
11871
11872   def Exec(self, feedback_fn):
11873     """Add the node group to the cluster.
11874
11875     """
11876     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11877                                   uuid=self.group_uuid,
11878                                   alloc_policy=self.op.alloc_policy,
11879                                   ndparams=self.op.ndparams)
11880
11881     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11882     del self.remove_locks[locking.LEVEL_NODEGROUP]
11883
11884
11885 class LUGroupAssignNodes(NoHooksLU):
11886   """Logical unit for assigning nodes to groups.
11887
11888   """
11889   REQ_BGL = False
11890
11891   def ExpandNames(self):
11892     # These raise errors.OpPrereqError on their own:
11893     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11894     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11895
11896     # We want to lock all the affected nodes and groups. We have readily
11897     # available the list of nodes, and the *destination* group. To gather the
11898     # list of "source" groups, we need to fetch node information later on.
11899     self.needed_locks = {
11900       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11901       locking.LEVEL_NODE: self.op.nodes,
11902       }
11903
11904   def DeclareLocks(self, level):
11905     if level == locking.LEVEL_NODEGROUP:
11906       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11907
11908       # Try to get all affected nodes' groups without having the group or node
11909       # lock yet. Needs verification later in the code flow.
11910       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11911
11912       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11913
11914   def CheckPrereq(self):
11915     """Check prerequisites.
11916
11917     """
11918     assert self.needed_locks[locking.LEVEL_NODEGROUP]
11919     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11920             frozenset(self.op.nodes))
11921
11922     expected_locks = (set([self.group_uuid]) |
11923                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11924     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11925     if actual_locks != expected_locks:
11926       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11927                                " current groups are '%s', used to be '%s'" %
11928                                (utils.CommaJoin(expected_locks),
11929                                 utils.CommaJoin(actual_locks)))
11930
11931     self.node_data = self.cfg.GetAllNodesInfo()
11932     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11933     instance_data = self.cfg.GetAllInstancesInfo()
11934
11935     if self.group is None:
11936       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11937                                (self.op.group_name, self.group_uuid))
11938
11939     (new_splits, previous_splits) = \
11940       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11941                                              for node in self.op.nodes],
11942                                             self.node_data, instance_data)
11943
11944     if new_splits:
11945       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11946
11947       if not self.op.force:
11948         raise errors.OpExecError("The following instances get split by this"
11949                                  " change and --force was not given: %s" %
11950                                  fmt_new_splits)
11951       else:
11952         self.LogWarning("This operation will split the following instances: %s",
11953                         fmt_new_splits)
11954
11955         if previous_splits:
11956           self.LogWarning("In addition, these already-split instances continue"
11957                           " to be split across groups: %s",
11958                           utils.CommaJoin(utils.NiceSort(previous_splits)))
11959
11960   def Exec(self, feedback_fn):
11961     """Assign nodes to a new group.
11962
11963     """
11964     for node in self.op.nodes:
11965       self.node_data[node].group = self.group_uuid
11966
11967     # FIXME: Depends on side-effects of modifying the result of
11968     # C{cfg.GetAllNodesInfo}
11969
11970     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11971
11972   @staticmethod
11973   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11974     """Check for split instances after a node assignment.
11975
11976     This method considers a series of node assignments as an atomic operation,
11977     and returns information about split instances after applying the set of
11978     changes.
11979
11980     In particular, it returns information about newly split instances, and
11981     instances that were already split, and remain so after the change.
11982
11983     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11984     considered.
11985
11986     @type changes: list of (node_name, new_group_uuid) pairs.
11987     @param changes: list of node assignments to consider.
11988     @param node_data: a dict with data for all nodes
11989     @param instance_data: a dict with all instances to consider
11990     @rtype: a two-tuple
11991     @return: a list of instances that were previously okay and result split as a
11992       consequence of this change, and a list of instances that were previously
11993       split and this change does not fix.
11994
11995     """
11996     changed_nodes = dict((node, group) for node, group in changes
11997                          if node_data[node].group != group)
11998
11999     all_split_instances = set()
12000     previously_split_instances = set()
12001
12002     def InstanceNodes(instance):
12003       return [instance.primary_node] + list(instance.secondary_nodes)
12004
12005     for inst in instance_data.values():
12006       if inst.disk_template not in constants.DTS_INT_MIRROR:
12007         continue
12008
12009       instance_nodes = InstanceNodes(inst)
12010
12011       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12012         previously_split_instances.add(inst.name)
12013
12014       if len(set(changed_nodes.get(node, node_data[node].group)
12015                  for node in instance_nodes)) > 1:
12016         all_split_instances.add(inst.name)
12017
12018     return (list(all_split_instances - previously_split_instances),
12019             list(previously_split_instances & all_split_instances))
12020
12021
12022 class _GroupQuery(_QueryBase):
12023   FIELDS = query.GROUP_FIELDS
12024
12025   def ExpandNames(self, lu):
12026     lu.needed_locks = {}
12027
12028     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12029     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12030
12031     if not self.names:
12032       self.wanted = [name_to_uuid[name]
12033                      for name in utils.NiceSort(name_to_uuid.keys())]
12034     else:
12035       # Accept names to be either names or UUIDs.
12036       missing = []
12037       self.wanted = []
12038       all_uuid = frozenset(self._all_groups.keys())
12039
12040       for name in self.names:
12041         if name in all_uuid:
12042           self.wanted.append(name)
12043         elif name in name_to_uuid:
12044           self.wanted.append(name_to_uuid[name])
12045         else:
12046           missing.append(name)
12047
12048       if missing:
12049         raise errors.OpPrereqError("Some groups do not exist: %s" %
12050                                    utils.CommaJoin(missing),
12051                                    errors.ECODE_NOENT)
12052
12053   def DeclareLocks(self, lu, level):
12054     pass
12055
12056   def _GetQueryData(self, lu):
12057     """Computes the list of node groups and their attributes.
12058
12059     """
12060     do_nodes = query.GQ_NODE in self.requested_data
12061     do_instances = query.GQ_INST in self.requested_data
12062
12063     group_to_nodes = None
12064     group_to_instances = None
12065
12066     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12067     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12068     # latter GetAllInstancesInfo() is not enough, for we have to go through
12069     # instance->node. Hence, we will need to process nodes even if we only need
12070     # instance information.
12071     if do_nodes or do_instances:
12072       all_nodes = lu.cfg.GetAllNodesInfo()
12073       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12074       node_to_group = {}
12075
12076       for node in all_nodes.values():
12077         if node.group in group_to_nodes:
12078           group_to_nodes[node.group].append(node.name)
12079           node_to_group[node.name] = node.group
12080
12081       if do_instances:
12082         all_instances = lu.cfg.GetAllInstancesInfo()
12083         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12084
12085         for instance in all_instances.values():
12086           node = instance.primary_node
12087           if node in node_to_group:
12088             group_to_instances[node_to_group[node]].append(instance.name)
12089
12090         if not do_nodes:
12091           # Do not pass on node information if it was not requested.
12092           group_to_nodes = None
12093
12094     return query.GroupQueryData([self._all_groups[uuid]
12095                                  for uuid in self.wanted],
12096                                 group_to_nodes, group_to_instances)
12097
12098
12099 class LUGroupQuery(NoHooksLU):
12100   """Logical unit for querying node groups.
12101
12102   """
12103   REQ_BGL = False
12104
12105   def CheckArguments(self):
12106     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12107                           self.op.output_fields, False)
12108
12109   def ExpandNames(self):
12110     self.gq.ExpandNames(self)
12111
12112   def DeclareLocks(self, level):
12113     self.gq.DeclareLocks(self, level)
12114
12115   def Exec(self, feedback_fn):
12116     return self.gq.OldStyleQuery(self)
12117
12118
12119 class LUGroupSetParams(LogicalUnit):
12120   """Modifies the parameters of a node group.
12121
12122   """
12123   HPATH = "group-modify"
12124   HTYPE = constants.HTYPE_GROUP
12125   REQ_BGL = False
12126
12127   def CheckArguments(self):
12128     all_changes = [
12129       self.op.ndparams,
12130       self.op.alloc_policy,
12131       ]
12132
12133     if all_changes.count(None) == len(all_changes):
12134       raise errors.OpPrereqError("Please pass at least one modification",
12135                                  errors.ECODE_INVAL)
12136
12137   def ExpandNames(self):
12138     # This raises errors.OpPrereqError on its own:
12139     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12140
12141     self.needed_locks = {
12142       locking.LEVEL_NODEGROUP: [self.group_uuid],
12143       }
12144
12145   def CheckPrereq(self):
12146     """Check prerequisites.
12147
12148     """
12149     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12150
12151     if self.group is None:
12152       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12153                                (self.op.group_name, self.group_uuid))
12154
12155     if self.op.ndparams:
12156       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12157       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12158       self.new_ndparams = new_ndparams
12159
12160   def BuildHooksEnv(self):
12161     """Build hooks env.
12162
12163     """
12164     return {
12165       "GROUP_NAME": self.op.group_name,
12166       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12167       }
12168
12169   def BuildHooksNodes(self):
12170     """Build hooks nodes.
12171
12172     """
12173     mn = self.cfg.GetMasterNode()
12174     return ([mn], [mn])
12175
12176   def Exec(self, feedback_fn):
12177     """Modifies the node group.
12178
12179     """
12180     result = []
12181
12182     if self.op.ndparams:
12183       self.group.ndparams = self.new_ndparams
12184       result.append(("ndparams", str(self.group.ndparams)))
12185
12186     if self.op.alloc_policy:
12187       self.group.alloc_policy = self.op.alloc_policy
12188
12189     self.cfg.Update(self.group, feedback_fn)
12190     return result
12191
12192
12193 class LUGroupRemove(LogicalUnit):
12194   HPATH = "group-remove"
12195   HTYPE = constants.HTYPE_GROUP
12196   REQ_BGL = False
12197
12198   def ExpandNames(self):
12199     # This will raises errors.OpPrereqError on its own:
12200     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12201     self.needed_locks = {
12202       locking.LEVEL_NODEGROUP: [self.group_uuid],
12203       }
12204
12205   def CheckPrereq(self):
12206     """Check prerequisites.
12207
12208     This checks that the given group name exists as a node group, that is
12209     empty (i.e., contains no nodes), and that is not the last group of the
12210     cluster.
12211
12212     """
12213     # Verify that the group is empty.
12214     group_nodes = [node.name
12215                    for node in self.cfg.GetAllNodesInfo().values()
12216                    if node.group == self.group_uuid]
12217
12218     if group_nodes:
12219       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12220                                  " nodes: %s" %
12221                                  (self.op.group_name,
12222                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12223                                  errors.ECODE_STATE)
12224
12225     # Verify the cluster would not be left group-less.
12226     if len(self.cfg.GetNodeGroupList()) == 1:
12227       raise errors.OpPrereqError("Group '%s' is the only group,"
12228                                  " cannot be removed" %
12229                                  self.op.group_name,
12230                                  errors.ECODE_STATE)
12231
12232   def BuildHooksEnv(self):
12233     """Build hooks env.
12234
12235     """
12236     return {
12237       "GROUP_NAME": self.op.group_name,
12238       }
12239
12240   def BuildHooksNodes(self):
12241     """Build hooks nodes.
12242
12243     """
12244     mn = self.cfg.GetMasterNode()
12245     return ([mn], [mn])
12246
12247   def Exec(self, feedback_fn):
12248     """Remove the node group.
12249
12250     """
12251     try:
12252       self.cfg.RemoveNodeGroup(self.group_uuid)
12253     except errors.ConfigurationError:
12254       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12255                                (self.op.group_name, self.group_uuid))
12256
12257     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12258
12259
12260 class LUGroupRename(LogicalUnit):
12261   HPATH = "group-rename"
12262   HTYPE = constants.HTYPE_GROUP
12263   REQ_BGL = False
12264
12265   def ExpandNames(self):
12266     # This raises errors.OpPrereqError on its own:
12267     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12268
12269     self.needed_locks = {
12270       locking.LEVEL_NODEGROUP: [self.group_uuid],
12271       }
12272
12273   def CheckPrereq(self):
12274     """Check prerequisites.
12275
12276     Ensures requested new name is not yet used.
12277
12278     """
12279     try:
12280       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12281     except errors.OpPrereqError:
12282       pass
12283     else:
12284       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12285                                  " node group (UUID: %s)" %
12286                                  (self.op.new_name, new_name_uuid),
12287                                  errors.ECODE_EXISTS)
12288
12289   def BuildHooksEnv(self):
12290     """Build hooks env.
12291
12292     """
12293     return {
12294       "OLD_NAME": self.op.group_name,
12295       "NEW_NAME": self.op.new_name,
12296       }
12297
12298   def BuildHooksNodes(self):
12299     """Build hooks nodes.
12300
12301     """
12302     mn = self.cfg.GetMasterNode()
12303
12304     all_nodes = self.cfg.GetAllNodesInfo()
12305     all_nodes.pop(mn, None)
12306
12307     run_nodes = [mn]
12308     run_nodes.extend(node.name for node in all_nodes.values()
12309                      if node.group == self.group_uuid)
12310
12311     return (run_nodes, run_nodes)
12312
12313   def Exec(self, feedback_fn):
12314     """Rename the node group.
12315
12316     """
12317     group = self.cfg.GetNodeGroup(self.group_uuid)
12318
12319     if group is None:
12320       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12321                                (self.op.group_name, self.group_uuid))
12322
12323     group.name = self.op.new_name
12324     self.cfg.Update(group, feedback_fn)
12325
12326     return self.op.new_name
12327
12328
12329 class LUGroupEvacuate(LogicalUnit):
12330   HPATH = "group-evacuate"
12331   HTYPE = constants.HTYPE_GROUP
12332   REQ_BGL = False
12333
12334   def ExpandNames(self):
12335     # This raises errors.OpPrereqError on its own:
12336     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12337
12338     if self.op.target_groups:
12339       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12340                                   self.op.target_groups)
12341     else:
12342       self.req_target_uuids = []
12343
12344     if self.group_uuid in self.req_target_uuids:
12345       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12346                                  " as a target group (targets are %s)" %
12347                                  (self.group_uuid,
12348                                   utils.CommaJoin(self.req_target_uuids)),
12349                                  errors.ECODE_INVAL)
12350
12351     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12352
12353     self.share_locks = _ShareAll()
12354     self.needed_locks = {
12355       locking.LEVEL_INSTANCE: [],
12356       locking.LEVEL_NODEGROUP: [],
12357       locking.LEVEL_NODE: [],
12358       }
12359
12360   def DeclareLocks(self, level):
12361     if level == locking.LEVEL_INSTANCE:
12362       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12363
12364       # Lock instances optimistically, needs verification once node and group
12365       # locks have been acquired
12366       self.needed_locks[locking.LEVEL_INSTANCE] = \
12367         self.cfg.GetNodeGroupInstances(self.group_uuid)
12368
12369     elif level == locking.LEVEL_NODEGROUP:
12370       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12371
12372       if self.req_target_uuids:
12373         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12374
12375         # Lock all groups used by instances optimistically; this requires going
12376         # via the node before it's locked, requiring verification later on
12377         lock_groups.update(group_uuid
12378                            for instance_name in
12379                              self.owned_locks(locking.LEVEL_INSTANCE)
12380                            for group_uuid in
12381                              self.cfg.GetInstanceNodeGroups(instance_name))
12382       else:
12383         # No target groups, need to lock all of them
12384         lock_groups = locking.ALL_SET
12385
12386       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12387
12388     elif level == locking.LEVEL_NODE:
12389       # This will only lock the nodes in the group to be evacuated which
12390       # contain actual instances
12391       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12392       self._LockInstancesNodes()
12393
12394       # Lock all nodes in group to be evacuated and target groups
12395       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12396       assert self.group_uuid in owned_groups
12397       member_nodes = [node_name
12398                       for group in owned_groups
12399                       for node_name in self.cfg.GetNodeGroup(group).members]
12400       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12401
12402   def CheckPrereq(self):
12403     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12404     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12405     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12406
12407     assert owned_groups.issuperset(self.req_target_uuids)
12408     assert self.group_uuid in owned_groups
12409
12410     # Check if locked instances are still correct
12411     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12412
12413     # Get instance information
12414     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12415
12416     # Check if node groups for locked instances are still correct
12417     for instance_name in owned_instances:
12418       inst = self.instances[instance_name]
12419       assert owned_nodes.issuperset(inst.all_nodes), \
12420         "Instance %s's nodes changed while we kept the lock" % instance_name
12421
12422       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12423                                              owned_groups)
12424
12425       assert self.group_uuid in inst_groups, \
12426         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12427
12428     if self.req_target_uuids:
12429       # User requested specific target groups
12430       self.target_uuids = self.req_target_uuids
12431     else:
12432       # All groups except the one to be evacuated are potential targets
12433       self.target_uuids = [group_uuid for group_uuid in owned_groups
12434                            if group_uuid != self.group_uuid]
12435
12436       if not self.target_uuids:
12437         raise errors.OpPrereqError("There are no possible target groups",
12438                                    errors.ECODE_INVAL)
12439
12440   def BuildHooksEnv(self):
12441     """Build hooks env.
12442
12443     """
12444     return {
12445       "GROUP_NAME": self.op.group_name,
12446       "TARGET_GROUPS": " ".join(self.target_uuids),
12447       }
12448
12449   def BuildHooksNodes(self):
12450     """Build hooks nodes.
12451
12452     """
12453     mn = self.cfg.GetMasterNode()
12454
12455     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12456
12457     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12458
12459     return (run_nodes, run_nodes)
12460
12461   def Exec(self, feedback_fn):
12462     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12463
12464     assert self.group_uuid not in self.target_uuids
12465
12466     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12467                      instances=instances, target_groups=self.target_uuids)
12468
12469     ial.Run(self.op.iallocator)
12470
12471     if not ial.success:
12472       raise errors.OpPrereqError("Can't compute group evacuation using"
12473                                  " iallocator '%s': %s" %
12474                                  (self.op.iallocator, ial.info),
12475                                  errors.ECODE_NORES)
12476
12477     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12478
12479     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12480                  len(jobs), self.op.group_name)
12481
12482     return ResultWithJobs(jobs)
12483
12484
12485 class TagsLU(NoHooksLU): # pylint: disable=W0223
12486   """Generic tags LU.
12487
12488   This is an abstract class which is the parent of all the other tags LUs.
12489
12490   """
12491   def ExpandNames(self):
12492     self.group_uuid = None
12493     self.needed_locks = {}
12494     if self.op.kind == constants.TAG_NODE:
12495       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12496       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12497     elif self.op.kind == constants.TAG_INSTANCE:
12498       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12499       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12500     elif self.op.kind == constants.TAG_NODEGROUP:
12501       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12502
12503     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12504     # not possible to acquire the BGL based on opcode parameters)
12505
12506   def CheckPrereq(self):
12507     """Check prerequisites.
12508
12509     """
12510     if self.op.kind == constants.TAG_CLUSTER:
12511       self.target = self.cfg.GetClusterInfo()
12512     elif self.op.kind == constants.TAG_NODE:
12513       self.target = self.cfg.GetNodeInfo(self.op.name)
12514     elif self.op.kind == constants.TAG_INSTANCE:
12515       self.target = self.cfg.GetInstanceInfo(self.op.name)
12516     elif self.op.kind == constants.TAG_NODEGROUP:
12517       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12518     else:
12519       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12520                                  str(self.op.kind), errors.ECODE_INVAL)
12521
12522
12523 class LUTagsGet(TagsLU):
12524   """Returns the tags of a given object.
12525
12526   """
12527   REQ_BGL = False
12528
12529   def ExpandNames(self):
12530     TagsLU.ExpandNames(self)
12531
12532     # Share locks as this is only a read operation
12533     self.share_locks = _ShareAll()
12534
12535   def Exec(self, feedback_fn):
12536     """Returns the tag list.
12537
12538     """
12539     return list(self.target.GetTags())
12540
12541
12542 class LUTagsSearch(NoHooksLU):
12543   """Searches the tags for a given pattern.
12544
12545   """
12546   REQ_BGL = False
12547
12548   def ExpandNames(self):
12549     self.needed_locks = {}
12550
12551   def CheckPrereq(self):
12552     """Check prerequisites.
12553
12554     This checks the pattern passed for validity by compiling it.
12555
12556     """
12557     try:
12558       self.re = re.compile(self.op.pattern)
12559     except re.error, err:
12560       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12561                                  (self.op.pattern, err), errors.ECODE_INVAL)
12562
12563   def Exec(self, feedback_fn):
12564     """Returns the tag list.
12565
12566     """
12567     cfg = self.cfg
12568     tgts = [("/cluster", cfg.GetClusterInfo())]
12569     ilist = cfg.GetAllInstancesInfo().values()
12570     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12571     nlist = cfg.GetAllNodesInfo().values()
12572     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12573     tgts.extend(("/nodegroup/%s" % n.name, n)
12574                 for n in cfg.GetAllNodeGroupsInfo().values())
12575     results = []
12576     for path, target in tgts:
12577       for tag in target.GetTags():
12578         if self.re.search(tag):
12579           results.append((path, tag))
12580     return results
12581
12582
12583 class LUTagsSet(TagsLU):
12584   """Sets a tag on a given object.
12585
12586   """
12587   REQ_BGL = False
12588
12589   def CheckPrereq(self):
12590     """Check prerequisites.
12591
12592     This checks the type and length of the tag name and value.
12593
12594     """
12595     TagsLU.CheckPrereq(self)
12596     for tag in self.op.tags:
12597       objects.TaggableObject.ValidateTag(tag)
12598
12599   def Exec(self, feedback_fn):
12600     """Sets the tag.
12601
12602     """
12603     try:
12604       for tag in self.op.tags:
12605         self.target.AddTag(tag)
12606     except errors.TagError, err:
12607       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12608     self.cfg.Update(self.target, feedback_fn)
12609
12610
12611 class LUTagsDel(TagsLU):
12612   """Delete a list of tags from a given object.
12613
12614   """
12615   REQ_BGL = False
12616
12617   def CheckPrereq(self):
12618     """Check prerequisites.
12619
12620     This checks that we have the given tag.
12621
12622     """
12623     TagsLU.CheckPrereq(self)
12624     for tag in self.op.tags:
12625       objects.TaggableObject.ValidateTag(tag)
12626     del_tags = frozenset(self.op.tags)
12627     cur_tags = self.target.GetTags()
12628
12629     diff_tags = del_tags - cur_tags
12630     if diff_tags:
12631       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12632       raise errors.OpPrereqError("Tag(s) %s not found" %
12633                                  (utils.CommaJoin(diff_names), ),
12634                                  errors.ECODE_NOENT)
12635
12636   def Exec(self, feedback_fn):
12637     """Remove the tag from the object.
12638
12639     """
12640     for tag in self.op.tags:
12641       self.target.RemoveTag(tag)
12642     self.cfg.Update(self.target, feedback_fn)
12643
12644
12645 class LUTestDelay(NoHooksLU):
12646   """Sleep for a specified amount of time.
12647
12648   This LU sleeps on the master and/or nodes for a specified amount of
12649   time.
12650
12651   """
12652   REQ_BGL = False
12653
12654   def ExpandNames(self):
12655     """Expand names and set required locks.
12656
12657     This expands the node list, if any.
12658
12659     """
12660     self.needed_locks = {}
12661     if self.op.on_nodes:
12662       # _GetWantedNodes can be used here, but is not always appropriate to use
12663       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12664       # more information.
12665       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12666       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12667
12668   def _TestDelay(self):
12669     """Do the actual sleep.
12670
12671     """
12672     if self.op.on_master:
12673       if not utils.TestDelay(self.op.duration):
12674         raise errors.OpExecError("Error during master delay test")
12675     if self.op.on_nodes:
12676       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12677       for node, node_result in result.items():
12678         node_result.Raise("Failure during rpc call to node %s" % node)
12679
12680   def Exec(self, feedback_fn):
12681     """Execute the test delay opcode, with the wanted repetitions.
12682
12683     """
12684     if self.op.repeat == 0:
12685       self._TestDelay()
12686     else:
12687       top_value = self.op.repeat - 1
12688       for i in range(self.op.repeat):
12689         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12690         self._TestDelay()
12691
12692
12693 class LUTestJqueue(NoHooksLU):
12694   """Utility LU to test some aspects of the job queue.
12695
12696   """
12697   REQ_BGL = False
12698
12699   # Must be lower than default timeout for WaitForJobChange to see whether it
12700   # notices changed jobs
12701   _CLIENT_CONNECT_TIMEOUT = 20.0
12702   _CLIENT_CONFIRM_TIMEOUT = 60.0
12703
12704   @classmethod
12705   def _NotifyUsingSocket(cls, cb, errcls):
12706     """Opens a Unix socket and waits for another program to connect.
12707
12708     @type cb: callable
12709     @param cb: Callback to send socket name to client
12710     @type errcls: class
12711     @param errcls: Exception class to use for errors
12712
12713     """
12714     # Using a temporary directory as there's no easy way to create temporary
12715     # sockets without writing a custom loop around tempfile.mktemp and
12716     # socket.bind
12717     tmpdir = tempfile.mkdtemp()
12718     try:
12719       tmpsock = utils.PathJoin(tmpdir, "sock")
12720
12721       logging.debug("Creating temporary socket at %s", tmpsock)
12722       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12723       try:
12724         sock.bind(tmpsock)
12725         sock.listen(1)
12726
12727         # Send details to client
12728         cb(tmpsock)
12729
12730         # Wait for client to connect before continuing
12731         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12732         try:
12733           (conn, _) = sock.accept()
12734         except socket.error, err:
12735           raise errcls("Client didn't connect in time (%s)" % err)
12736       finally:
12737         sock.close()
12738     finally:
12739       # Remove as soon as client is connected
12740       shutil.rmtree(tmpdir)
12741
12742     # Wait for client to close
12743     try:
12744       try:
12745         # pylint: disable=E1101
12746         # Instance of '_socketobject' has no ... member
12747         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12748         conn.recv(1)
12749       except socket.error, err:
12750         raise errcls("Client failed to confirm notification (%s)" % err)
12751     finally:
12752       conn.close()
12753
12754   def _SendNotification(self, test, arg, sockname):
12755     """Sends a notification to the client.
12756
12757     @type test: string
12758     @param test: Test name
12759     @param arg: Test argument (depends on test)
12760     @type sockname: string
12761     @param sockname: Socket path
12762
12763     """
12764     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12765
12766   def _Notify(self, prereq, test, arg):
12767     """Notifies the client of a test.
12768
12769     @type prereq: bool
12770     @param prereq: Whether this is a prereq-phase test
12771     @type test: string
12772     @param test: Test name
12773     @param arg: Test argument (depends on test)
12774
12775     """
12776     if prereq:
12777       errcls = errors.OpPrereqError
12778     else:
12779       errcls = errors.OpExecError
12780
12781     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12782                                                   test, arg),
12783                                    errcls)
12784
12785   def CheckArguments(self):
12786     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12787     self.expandnames_calls = 0
12788
12789   def ExpandNames(self):
12790     checkargs_calls = getattr(self, "checkargs_calls", 0)
12791     if checkargs_calls < 1:
12792       raise errors.ProgrammerError("CheckArguments was not called")
12793
12794     self.expandnames_calls += 1
12795
12796     if self.op.notify_waitlock:
12797       self._Notify(True, constants.JQT_EXPANDNAMES, None)
12798
12799     self.LogInfo("Expanding names")
12800
12801     # Get lock on master node (just to get a lock, not for a particular reason)
12802     self.needed_locks = {
12803       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12804       }
12805
12806   def Exec(self, feedback_fn):
12807     if self.expandnames_calls < 1:
12808       raise errors.ProgrammerError("ExpandNames was not called")
12809
12810     if self.op.notify_exec:
12811       self._Notify(False, constants.JQT_EXEC, None)
12812
12813     self.LogInfo("Executing")
12814
12815     if self.op.log_messages:
12816       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12817       for idx, msg in enumerate(self.op.log_messages):
12818         self.LogInfo("Sending log message %s", idx + 1)
12819         feedback_fn(constants.JQT_MSGPREFIX + msg)
12820         # Report how many test messages have been sent
12821         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12822
12823     if self.op.fail:
12824       raise errors.OpExecError("Opcode failure was requested")
12825
12826     return True
12827
12828
12829 class IAllocator(object):
12830   """IAllocator framework.
12831
12832   An IAllocator instance has three sets of attributes:
12833     - cfg that is needed to query the cluster
12834     - input data (all members of the _KEYS class attribute are required)
12835     - four buffer attributes (in|out_data|text), that represent the
12836       input (to the external script) in text and data structure format,
12837       and the output from it, again in two formats
12838     - the result variables from the script (success, info, nodes) for
12839       easy usage
12840
12841   """
12842   # pylint: disable=R0902
12843   # lots of instance attributes
12844
12845   def __init__(self, cfg, rpc, mode, **kwargs):
12846     self.cfg = cfg
12847     self.rpc = rpc
12848     # init buffer variables
12849     self.in_text = self.out_text = self.in_data = self.out_data = None
12850     # init all input fields so that pylint is happy
12851     self.mode = mode
12852     self.memory = self.disks = self.disk_template = None
12853     self.os = self.tags = self.nics = self.vcpus = None
12854     self.hypervisor = None
12855     self.relocate_from = None
12856     self.name = None
12857     self.instances = None
12858     self.evac_mode = None
12859     self.target_groups = []
12860     # computed fields
12861     self.required_nodes = None
12862     # init result fields
12863     self.success = self.info = self.result = None
12864
12865     try:
12866       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12867     except KeyError:
12868       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12869                                    " IAllocator" % self.mode)
12870
12871     keyset = [n for (n, _) in keydata]
12872
12873     for key in kwargs:
12874       if key not in keyset:
12875         raise errors.ProgrammerError("Invalid input parameter '%s' to"
12876                                      " IAllocator" % key)
12877       setattr(self, key, kwargs[key])
12878
12879     for key in keyset:
12880       if key not in kwargs:
12881         raise errors.ProgrammerError("Missing input parameter '%s' to"
12882                                      " IAllocator" % key)
12883     self._BuildInputData(compat.partial(fn, self), keydata)
12884
12885   def _ComputeClusterData(self):
12886     """Compute the generic allocator input data.
12887
12888     This is the data that is independent of the actual operation.
12889
12890     """
12891     cfg = self.cfg
12892     cluster_info = cfg.GetClusterInfo()
12893     # cluster data
12894     data = {
12895       "version": constants.IALLOCATOR_VERSION,
12896       "cluster_name": cfg.GetClusterName(),
12897       "cluster_tags": list(cluster_info.GetTags()),
12898       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12899       # we don't have job IDs
12900       }
12901     ninfo = cfg.GetAllNodesInfo()
12902     iinfo = cfg.GetAllInstancesInfo().values()
12903     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12904
12905     # node data
12906     node_list = [n.name for n in ninfo.values() if n.vm_capable]
12907
12908     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12909       hypervisor_name = self.hypervisor
12910     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12911       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12912     else:
12913       hypervisor_name = cluster_info.enabled_hypervisors[0]
12914
12915     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12916                                         hypervisor_name)
12917     node_iinfo = \
12918       self.rpc.call_all_instances_info(node_list,
12919                                        cluster_info.enabled_hypervisors)
12920
12921     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12922
12923     config_ndata = self._ComputeBasicNodeData(ninfo)
12924     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12925                                                  i_list, config_ndata)
12926     assert len(data["nodes"]) == len(ninfo), \
12927         "Incomplete node data computed"
12928
12929     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12930
12931     self.in_data = data
12932
12933   @staticmethod
12934   def _ComputeNodeGroupData(cfg):
12935     """Compute node groups data.
12936
12937     """
12938     ng = dict((guuid, {
12939       "name": gdata.name,
12940       "alloc_policy": gdata.alloc_policy,
12941       })
12942       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12943
12944     return ng
12945
12946   @staticmethod
12947   def _ComputeBasicNodeData(node_cfg):
12948     """Compute global node data.
12949
12950     @rtype: dict
12951     @returns: a dict of name: (node dict, node config)
12952
12953     """
12954     # fill in static (config-based) values
12955     node_results = dict((ninfo.name, {
12956       "tags": list(ninfo.GetTags()),
12957       "primary_ip": ninfo.primary_ip,
12958       "secondary_ip": ninfo.secondary_ip,
12959       "offline": ninfo.offline,
12960       "drained": ninfo.drained,
12961       "master_candidate": ninfo.master_candidate,
12962       "group": ninfo.group,
12963       "master_capable": ninfo.master_capable,
12964       "vm_capable": ninfo.vm_capable,
12965       })
12966       for ninfo in node_cfg.values())
12967
12968     return node_results
12969
12970   @staticmethod
12971   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12972                               node_results):
12973     """Compute global node data.
12974
12975     @param node_results: the basic node structures as filled from the config
12976
12977     """
12978     # make a copy of the current dict
12979     node_results = dict(node_results)
12980     for nname, nresult in node_data.items():
12981       assert nname in node_results, "Missing basic data for node %s" % nname
12982       ninfo = node_cfg[nname]
12983
12984       if not (ninfo.offline or ninfo.drained):
12985         nresult.Raise("Can't get data for node %s" % nname)
12986         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12987                                 nname)
12988         remote_info = nresult.payload
12989
12990         for attr in ["memory_total", "memory_free", "memory_dom0",
12991                      "vg_size", "vg_free", "cpu_total"]:
12992           if attr not in remote_info:
12993             raise errors.OpExecError("Node '%s' didn't return attribute"
12994                                      " '%s'" % (nname, attr))
12995           if not isinstance(remote_info[attr], int):
12996             raise errors.OpExecError("Node '%s' returned invalid value"
12997                                      " for '%s': %s" %
12998                                      (nname, attr, remote_info[attr]))
12999         # compute memory used by primary instances
13000         i_p_mem = i_p_up_mem = 0
13001         for iinfo, beinfo in i_list:
13002           if iinfo.primary_node == nname:
13003             i_p_mem += beinfo[constants.BE_MEMORY]
13004             if iinfo.name not in node_iinfo[nname].payload:
13005               i_used_mem = 0
13006             else:
13007               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13008             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13009             remote_info["memory_free"] -= max(0, i_mem_diff)
13010
13011             if iinfo.admin_up:
13012               i_p_up_mem += beinfo[constants.BE_MEMORY]
13013
13014         # compute memory used by instances
13015         pnr_dyn = {
13016           "total_memory": remote_info["memory_total"],
13017           "reserved_memory": remote_info["memory_dom0"],
13018           "free_memory": remote_info["memory_free"],
13019           "total_disk": remote_info["vg_size"],
13020           "free_disk": remote_info["vg_free"],
13021           "total_cpus": remote_info["cpu_total"],
13022           "i_pri_memory": i_p_mem,
13023           "i_pri_up_memory": i_p_up_mem,
13024           }
13025         pnr_dyn.update(node_results[nname])
13026         node_results[nname] = pnr_dyn
13027
13028     return node_results
13029
13030   @staticmethod
13031   def _ComputeInstanceData(cluster_info, i_list):
13032     """Compute global instance data.
13033
13034     """
13035     instance_data = {}
13036     for iinfo, beinfo in i_list:
13037       nic_data = []
13038       for nic in iinfo.nics:
13039         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13040         nic_dict = {
13041           "mac": nic.mac,
13042           "ip": nic.ip,
13043           "mode": filled_params[constants.NIC_MODE],
13044           "link": filled_params[constants.NIC_LINK],
13045           }
13046         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13047           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13048         nic_data.append(nic_dict)
13049       pir = {
13050         "tags": list(iinfo.GetTags()),
13051         "admin_up": iinfo.admin_up,
13052         "vcpus": beinfo[constants.BE_VCPUS],
13053         "memory": beinfo[constants.BE_MEMORY],
13054         "os": iinfo.os,
13055         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13056         "nics": nic_data,
13057         "disks": [{constants.IDISK_SIZE: dsk.size,
13058                    constants.IDISK_MODE: dsk.mode}
13059                   for dsk in iinfo.disks],
13060         "disk_template": iinfo.disk_template,
13061         "hypervisor": iinfo.hypervisor,
13062         }
13063       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13064                                                  pir["disks"])
13065       instance_data[iinfo.name] = pir
13066
13067     return instance_data
13068
13069   def _AddNewInstance(self):
13070     """Add new instance data to allocator structure.
13071
13072     This in combination with _AllocatorGetClusterData will create the
13073     correct structure needed as input for the allocator.
13074
13075     The checks for the completeness of the opcode must have already been
13076     done.
13077
13078     """
13079     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13080
13081     if self.disk_template in constants.DTS_INT_MIRROR:
13082       self.required_nodes = 2
13083     else:
13084       self.required_nodes = 1
13085
13086     request = {
13087       "name": self.name,
13088       "disk_template": self.disk_template,
13089       "tags": self.tags,
13090       "os": self.os,
13091       "vcpus": self.vcpus,
13092       "memory": self.memory,
13093       "disks": self.disks,
13094       "disk_space_total": disk_space,
13095       "nics": self.nics,
13096       "required_nodes": self.required_nodes,
13097       "hypervisor": self.hypervisor,
13098       }
13099
13100     return request
13101
13102   def _AddRelocateInstance(self):
13103     """Add relocate instance data to allocator structure.
13104
13105     This in combination with _IAllocatorGetClusterData will create the
13106     correct structure needed as input for the allocator.
13107
13108     The checks for the completeness of the opcode must have already been
13109     done.
13110
13111     """
13112     instance = self.cfg.GetInstanceInfo(self.name)
13113     if instance is None:
13114       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13115                                    " IAllocator" % self.name)
13116
13117     if instance.disk_template not in constants.DTS_MIRRORED:
13118       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13119                                  errors.ECODE_INVAL)
13120
13121     if instance.disk_template in constants.DTS_INT_MIRROR and \
13122         len(instance.secondary_nodes) != 1:
13123       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13124                                  errors.ECODE_STATE)
13125
13126     self.required_nodes = 1
13127     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13128     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13129
13130     request = {
13131       "name": self.name,
13132       "disk_space_total": disk_space,
13133       "required_nodes": self.required_nodes,
13134       "relocate_from": self.relocate_from,
13135       }
13136     return request
13137
13138   def _AddNodeEvacuate(self):
13139     """Get data for node-evacuate requests.
13140
13141     """
13142     return {
13143       "instances": self.instances,
13144       "evac_mode": self.evac_mode,
13145       }
13146
13147   def _AddChangeGroup(self):
13148     """Get data for node-evacuate requests.
13149
13150     """
13151     return {
13152       "instances": self.instances,
13153       "target_groups": self.target_groups,
13154       }
13155
13156   def _BuildInputData(self, fn, keydata):
13157     """Build input data structures.
13158
13159     """
13160     self._ComputeClusterData()
13161
13162     request = fn()
13163     request["type"] = self.mode
13164     for keyname, keytype in keydata:
13165       if keyname not in request:
13166         raise errors.ProgrammerError("Request parameter %s is missing" %
13167                                      keyname)
13168       val = request[keyname]
13169       if not keytype(val):
13170         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13171                                      " validation, value %s, expected"
13172                                      " type %s" % (keyname, val, keytype))
13173     self.in_data["request"] = request
13174
13175     self.in_text = serializer.Dump(self.in_data)
13176
13177   _STRING_LIST = ht.TListOf(ht.TString)
13178   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13179      # pylint: disable=E1101
13180      # Class '...' has no 'OP_ID' member
13181      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13182                           opcodes.OpInstanceMigrate.OP_ID,
13183                           opcodes.OpInstanceReplaceDisks.OP_ID])
13184      })))
13185
13186   _NEVAC_MOVED = \
13187     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13188                        ht.TItems([ht.TNonEmptyString,
13189                                   ht.TNonEmptyString,
13190                                   ht.TListOf(ht.TNonEmptyString),
13191                                  ])))
13192   _NEVAC_FAILED = \
13193     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13194                        ht.TItems([ht.TNonEmptyString,
13195                                   ht.TMaybeString,
13196                                  ])))
13197   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13198                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13199
13200   _MODE_DATA = {
13201     constants.IALLOCATOR_MODE_ALLOC:
13202       (_AddNewInstance,
13203        [
13204         ("name", ht.TString),
13205         ("memory", ht.TInt),
13206         ("disks", ht.TListOf(ht.TDict)),
13207         ("disk_template", ht.TString),
13208         ("os", ht.TString),
13209         ("tags", _STRING_LIST),
13210         ("nics", ht.TListOf(ht.TDict)),
13211         ("vcpus", ht.TInt),
13212         ("hypervisor", ht.TString),
13213         ], ht.TList),
13214     constants.IALLOCATOR_MODE_RELOC:
13215       (_AddRelocateInstance,
13216        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13217        ht.TList),
13218      constants.IALLOCATOR_MODE_NODE_EVAC:
13219       (_AddNodeEvacuate, [
13220         ("instances", _STRING_LIST),
13221         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13222         ], _NEVAC_RESULT),
13223      constants.IALLOCATOR_MODE_CHG_GROUP:
13224       (_AddChangeGroup, [
13225         ("instances", _STRING_LIST),
13226         ("target_groups", _STRING_LIST),
13227         ], _NEVAC_RESULT),
13228     }
13229
13230   def Run(self, name, validate=True, call_fn=None):
13231     """Run an instance allocator and return the results.
13232
13233     """
13234     if call_fn is None:
13235       call_fn = self.rpc.call_iallocator_runner
13236
13237     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13238     result.Raise("Failure while running the iallocator script")
13239
13240     self.out_text = result.payload
13241     if validate:
13242       self._ValidateResult()
13243
13244   def _ValidateResult(self):
13245     """Process the allocator results.
13246
13247     This will process and if successful save the result in
13248     self.out_data and the other parameters.
13249
13250     """
13251     try:
13252       rdict = serializer.Load(self.out_text)
13253     except Exception, err:
13254       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13255
13256     if not isinstance(rdict, dict):
13257       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13258
13259     # TODO: remove backwards compatiblity in later versions
13260     if "nodes" in rdict and "result" not in rdict:
13261       rdict["result"] = rdict["nodes"]
13262       del rdict["nodes"]
13263
13264     for key in "success", "info", "result":
13265       if key not in rdict:
13266         raise errors.OpExecError("Can't parse iallocator results:"
13267                                  " missing key '%s'" % key)
13268       setattr(self, key, rdict[key])
13269
13270     if not self._result_check(self.result):
13271       raise errors.OpExecError("Iallocator returned invalid result,"
13272                                " expected %s, got %s" %
13273                                (self._result_check, self.result),
13274                                errors.ECODE_INVAL)
13275
13276     if self.mode == constants.IALLOCATOR_MODE_RELOC:
13277       assert self.relocate_from is not None
13278       assert self.required_nodes == 1
13279
13280       node2group = dict((name, ndata["group"])
13281                         for (name, ndata) in self.in_data["nodes"].items())
13282
13283       fn = compat.partial(self._NodesToGroups, node2group,
13284                           self.in_data["nodegroups"])
13285
13286       instance = self.cfg.GetInstanceInfo(self.name)
13287       request_groups = fn(self.relocate_from + [instance.primary_node])
13288       result_groups = fn(rdict["result"] + [instance.primary_node])
13289
13290       if self.success and not set(result_groups).issubset(request_groups):
13291         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13292                                  " differ from original groups (%s)" %
13293                                  (utils.CommaJoin(result_groups),
13294                                   utils.CommaJoin(request_groups)))
13295
13296     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13297       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13298
13299     self.out_data = rdict
13300
13301   @staticmethod
13302   def _NodesToGroups(node2group, groups, nodes):
13303     """Returns a list of unique group names for a list of nodes.
13304
13305     @type node2group: dict
13306     @param node2group: Map from node name to group UUID
13307     @type groups: dict
13308     @param groups: Group information
13309     @type nodes: list
13310     @param nodes: Node names
13311
13312     """
13313     result = set()
13314
13315     for node in nodes:
13316       try:
13317         group_uuid = node2group[node]
13318       except KeyError:
13319         # Ignore unknown node
13320         pass
13321       else:
13322         try:
13323           group = groups[group_uuid]
13324         except KeyError:
13325           # Can't find group, let's use UUID
13326           group_name = group_uuid
13327         else:
13328           group_name = group["name"]
13329
13330         result.add(group_name)
13331
13332     return sorted(result)
13333
13334
13335 class LUTestAllocator(NoHooksLU):
13336   """Run allocator tests.
13337
13338   This LU runs the allocator tests
13339
13340   """
13341   def CheckPrereq(self):
13342     """Check prerequisites.
13343
13344     This checks the opcode parameters depending on the director and mode test.
13345
13346     """
13347     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13348       for attr in ["memory", "disks", "disk_template",
13349                    "os", "tags", "nics", "vcpus"]:
13350         if not hasattr(self.op, attr):
13351           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13352                                      attr, errors.ECODE_INVAL)
13353       iname = self.cfg.ExpandInstanceName(self.op.name)
13354       if iname is not None:
13355         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13356                                    iname, errors.ECODE_EXISTS)
13357       if not isinstance(self.op.nics, list):
13358         raise errors.OpPrereqError("Invalid parameter 'nics'",
13359                                    errors.ECODE_INVAL)
13360       if not isinstance(self.op.disks, list):
13361         raise errors.OpPrereqError("Invalid parameter 'disks'",
13362                                    errors.ECODE_INVAL)
13363       for row in self.op.disks:
13364         if (not isinstance(row, dict) or
13365             constants.IDISK_SIZE not in row or
13366             not isinstance(row[constants.IDISK_SIZE], int) or
13367             constants.IDISK_MODE not in row or
13368             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13369           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13370                                      " parameter", errors.ECODE_INVAL)
13371       if self.op.hypervisor is None:
13372         self.op.hypervisor = self.cfg.GetHypervisorType()
13373     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13374       fname = _ExpandInstanceName(self.cfg, self.op.name)
13375       self.op.name = fname
13376       self.relocate_from = \
13377           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13378     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13379                           constants.IALLOCATOR_MODE_NODE_EVAC):
13380       if not self.op.instances:
13381         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13382       self.op.instances = _GetWantedInstances(self, self.op.instances)
13383     else:
13384       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13385                                  self.op.mode, errors.ECODE_INVAL)
13386
13387     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13388       if self.op.allocator is None:
13389         raise errors.OpPrereqError("Missing allocator name",
13390                                    errors.ECODE_INVAL)
13391     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13392       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13393                                  self.op.direction, errors.ECODE_INVAL)
13394
13395   def Exec(self, feedback_fn):
13396     """Run the allocator test.
13397
13398     """
13399     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13400       ial = IAllocator(self.cfg, self.rpc,
13401                        mode=self.op.mode,
13402                        name=self.op.name,
13403                        memory=self.op.memory,
13404                        disks=self.op.disks,
13405                        disk_template=self.op.disk_template,
13406                        os=self.op.os,
13407                        tags=self.op.tags,
13408                        nics=self.op.nics,
13409                        vcpus=self.op.vcpus,
13410                        hypervisor=self.op.hypervisor,
13411                        )
13412     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13413       ial = IAllocator(self.cfg, self.rpc,
13414                        mode=self.op.mode,
13415                        name=self.op.name,
13416                        relocate_from=list(self.relocate_from),
13417                        )
13418     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13419       ial = IAllocator(self.cfg, self.rpc,
13420                        mode=self.op.mode,
13421                        instances=self.op.instances,
13422                        target_groups=self.op.target_groups)
13423     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13424       ial = IAllocator(self.cfg, self.rpc,
13425                        mode=self.op.mode,
13426                        instances=self.op.instances,
13427                        evac_mode=self.op.evac_mode)
13428     else:
13429       raise errors.ProgrammerError("Uncatched mode %s in"
13430                                    " LUTestAllocator.Exec", self.op.mode)
13431
13432     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13433       result = ial.in_text
13434     else:
13435       ial.Run(self.op.allocator, validate=False)
13436       result = ial.out_text
13437     return result
13438
13439
13440 #: Query type implementations
13441 _QUERY_IMPL = {
13442   constants.QR_INSTANCE: _InstanceQuery,
13443   constants.QR_NODE: _NodeQuery,
13444   constants.QR_GROUP: _GroupQuery,
13445   constants.QR_OS: _OsQuery,
13446   }
13447
13448 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13449
13450
13451 def _GetQueryImplementation(name):
13452   """Returns the implemtnation for a query type.
13453
13454   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13455
13456   """
13457   try:
13458     return _QUERY_IMPL[name]
13459   except KeyError:
13460     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13461                                errors.ECODE_INVAL)