code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62
  63 import ganeti.masterd.instance # pylint: disable=W0611
  64
  65
  66 class ResultWithJobs:
  67   """Data container for LU results with jobs.
  68
  69   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  70   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  71   contained in the C{jobs} attribute and include the job IDs in the opcode
  72   result.
  73
  74   """
  75   def __init__(self, jobs, **kwargs):
  76     """Initializes this class.
  77
  78     Additional return values can be specified as keyword arguments.
  79
  80     @type jobs: list of lists of L{opcode.OpCode}
  81     @param jobs: A list of lists of opcode objects
  82
  83     """
  84     self.jobs = jobs
  85     self.other = kwargs
  86
  87
  88 class LogicalUnit(object):
  89   """Logical Unit base class.
  90
  91   Subclasses must follow these rules:
  92     - implement ExpandNames
  93     - implement CheckPrereq (except when tasklets are used)
  94     - implement Exec (except when tasklets are used)
  95     - implement BuildHooksEnv
  96     - implement BuildHooksNodes
  97     - redefine HPATH and HTYPE
  98     - optionally redefine their run requirements:
  99         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 100
 101   Note that all commands require root permissions.
 102
 103   @ivar dry_run_result: the value (if any) that will be returned to the caller
 104       in dry-run mode (signalled by opcode dry_run parameter)
 105
 106   """
 107   HPATH = None
 108   HTYPE = None
 109   REQ_BGL = True
 110
 111   def __init__(self, processor, op, context, rpc):
 112     """Constructor for LogicalUnit.
 113
 114     This needs to be overridden in derived classes in order to check op
 115     validity.
 116
 117     """
 118     self.proc = processor
 119     self.op = op
 120     self.cfg = context.cfg
 121     self.glm = context.glm
 122     # readability alias
 123     self.owned_locks = context.glm.list_owned
 124     self.context = context
 125     self.rpc = rpc
 126     # Dicts used to declare locking needs to mcpu
 127     self.needed_locks = None
 128     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 129     self.add_locks = {}
 130     self.remove_locks = {}
 131     # Used to force good behavior when calling helper functions
 132     self.recalculate_locks = {}
 133     # logging
 134     self.Log = processor.Log # pylint: disable=C0103
 135     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 136     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 137     self.LogStep = processor.LogStep # pylint: disable=C0103
 138     # support for dry-run
 139     self.dry_run_result = None
 140     # support for generic debug attribute
 141     if (not hasattr(self.op, "debug_level") or
 142         not isinstance(self.op.debug_level, int)):
 143       self.op.debug_level = 0
 144
 145     # Tasklets
 146     self.tasklets = None
 147
 148     # Validate opcode parameters and set defaults
 149     self.op.Validate(True)
 150
 151     self.CheckArguments()
 152
 153   def CheckArguments(self):
 154     """Check syntactic validity for the opcode arguments.
 155
 156     This method is for doing a simple syntactic check and ensure
 157     validity of opcode parameters, without any cluster-related
 158     checks. While the same can be accomplished in ExpandNames and/or
 159     CheckPrereq, doing these separate is better because:
 160
 161       - ExpandNames is left as as purely a lock-related function
 162       - CheckPrereq is run after we have acquired locks (and possible
 163         waited for them)
 164
 165     The function is allowed to change the self.op attribute so that
 166     later methods can no longer worry about missing parameters.
 167
 168     """
 169     pass
 170
 171   def ExpandNames(self):
 172     """Expand names for this LU.
 173
 174     This method is called before starting to execute the opcode, and it should
 175     update all the parameters of the opcode to their canonical form (e.g. a
 176     short node name must be fully expanded after this method has successfully
 177     completed). This way locking, hooks, logging, etc. can work correctly.
 178
 179     LUs which implement this method must also populate the self.needed_locks
 180     member, as a dict with lock levels as keys, and a list of needed lock names
 181     as values. Rules:
 182
 183       - use an empty dict if you don't need any lock
 184       - if you don't need any lock at a particular level omit that level
 185       - don't put anything for the BGL level
 186       - if you want all locks at a level use locking.ALL_SET as a value
 187
 188     If you need to share locks (rather than acquire them exclusively) at one
 189     level you can modify self.share_locks, setting a true value (usually 1) for
 190     that level. By default locks are not shared.
 191
 192     This function can also define a list of tasklets, which then will be
 193     executed in order instead of the usual LU-level CheckPrereq and Exec
 194     functions, if those are not defined by the LU.
 195
 196     Examples::
 197
 198       # Acquire all nodes and one instance
 199       self.needed_locks = {
 200         locking.LEVEL_NODE: locking.ALL_SET,
 201         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 202       }
 203       # Acquire just two nodes
 204       self.needed_locks = {
 205         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 206       }
 207       # Acquire no locks
 208       self.needed_locks = {} # No, you can't leave it to the default value None
 209
 210     """
 211     # The implementation of this method is mandatory only if the new LU is
 212     # concurrent, so that old LUs don't need to be changed all at the same
 213     # time.
 214     if self.REQ_BGL:
 215       self.needed_locks = {} # Exclusive LUs don't need locks.
 216     else:
 217       raise NotImplementedError
 218
 219   def DeclareLocks(self, level):
 220     """Declare LU locking needs for a level
 221
 222     While most LUs can just declare their locking needs at ExpandNames time,
 223     sometimes there's the need to calculate some locks after having acquired
 224     the ones before. This function is called just before acquiring locks at a
 225     particular level, but after acquiring the ones at lower levels, and permits
 226     such calculations. It can be used to modify self.needed_locks, and by
 227     default it does nothing.
 228
 229     This function is only called if you have something already set in
 230     self.needed_locks for the level.
 231
 232     @param level: Locking level which is going to be locked
 233     @type level: member of ganeti.locking.LEVELS
 234
 235     """
 236
 237   def CheckPrereq(self):
 238     """Check prerequisites for this LU.
 239
 240     This method should check that the prerequisites for the execution
 241     of this LU are fulfilled. It can do internode communication, but
 242     it should be idempotent - no cluster or system changes are
 243     allowed.
 244
 245     The method should raise errors.OpPrereqError in case something is
 246     not fulfilled. Its return value is ignored.
 247
 248     This method should also update all the parameters of the opcode to
 249     their canonical form if it hasn't been done by ExpandNames before.
 250
 251     """
 252     if self.tasklets is not None:
 253       for (idx, tl) in enumerate(self.tasklets):
 254         logging.debug("Checking prerequisites for tasklet %s/%s",
 255                       idx + 1, len(self.tasklets))
 256         tl.CheckPrereq()
 257     else:
 258       pass
 259
 260   def Exec(self, feedback_fn):
 261     """Execute the LU.
 262
 263     This method should implement the actual work. It should raise
 264     errors.OpExecError for failures that are somewhat dealt with in
 265     code, or expected.
 266
 267     """
 268     if self.tasklets is not None:
 269       for (idx, tl) in enumerate(self.tasklets):
 270         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 271         tl.Exec(feedback_fn)
 272     else:
 273       raise NotImplementedError
 274
 275   def BuildHooksEnv(self):
 276     """Build hooks environment for this LU.
 277
 278     @rtype: dict
 279     @return: Dictionary containing the environment that will be used for
 280       running the hooks for this LU. The keys of the dict must not be prefixed
 281       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 282       will extend the environment with additional variables. If no environment
 283       should be defined, an empty dictionary should be returned (not C{None}).
 284     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 285       will not be called.
 286
 287     """
 288     raise NotImplementedError
 289
 290   def BuildHooksNodes(self):
 291     """Build list of nodes to run LU's hooks.
 292
 293     @rtype: tuple; (list, list)
 294     @return: Tuple containing a list of node names on which the hook
 295       should run before the execution and a list of node names on which the
 296       hook should run after the execution. No nodes should be returned as an
 297       empty list (and not None).
 298     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 299       will not be called.
 300
 301     """
 302     raise NotImplementedError
 303
 304   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 305     """Notify the LU about the results of its hooks.
 306
 307     This method is called every time a hooks phase is executed, and notifies
 308     the Logical Unit about the hooks' result. The LU can then use it to alter
 309     its result based on the hooks.  By default the method does nothing and the
 310     previous result is passed back unchanged but any LU can define it if it
 311     wants to use the local cluster hook-scripts somehow.
 312
 313     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 314         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 315     @param hook_results: the results of the multi-node hooks rpc call
 316     @param feedback_fn: function used send feedback back to the caller
 317     @param lu_result: the previous Exec result this LU had, or None
 318         in the PRE phase
 319     @return: the new Exec result, based on the previous result
 320         and hook results
 321
 322     """
 323     # API must be kept, thus we ignore the unused argument and could
 324     # be a function warnings
 325     # pylint: disable=W0613,R0201
 326     return lu_result
 327
 328   def _ExpandAndLockInstance(self):
 329     """Helper function to expand and lock an instance.
 330
 331     Many LUs that work on an instance take its name in self.op.instance_name
 332     and need to expand it and then declare the expanded name for locking. This
 333     function does it, and then updates self.op.instance_name to the expanded
 334     name. It also initializes needed_locks as a dict, if this hasn't been done
 335     before.
 336
 337     """
 338     if self.needed_locks is None:
 339       self.needed_locks = {}
 340     else:
 341       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 342         "_ExpandAndLockInstance called with instance-level locks set"
 343     self.op.instance_name = _ExpandInstanceName(self.cfg,
 344                                                 self.op.instance_name)
 345     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 346
 347   def _LockInstancesNodes(self, primary_only=False):
 348     """Helper function to declare instances' nodes for locking.
 349
 350     This function should be called after locking one or more instances to lock
 351     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 352     with all primary or secondary nodes for instances already locked and
 353     present in self.needed_locks[locking.LEVEL_INSTANCE].
 354
 355     It should be called from DeclareLocks, and for safety only works if
 356     self.recalculate_locks[locking.LEVEL_NODE] is set.
 357
 358     In the future it may grow parameters to just lock some instance's nodes, or
 359     to just lock primaries or secondary nodes, if needed.
 360
 361     If should be called in DeclareLocks in a way similar to::
 362
 363       if level == locking.LEVEL_NODE:
 364         self._LockInstancesNodes()
 365
 366     @type primary_only: boolean
 367     @param primary_only: only lock primary nodes of locked instances
 368
 369     """
 370     assert locking.LEVEL_NODE in self.recalculate_locks, \
 371       "_LockInstancesNodes helper function called with no nodes to recalculate"
 372
 373     # TODO: check if we're really been called with the instance locks held
 374
 375     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 376     # future we might want to have different behaviors depending on the value
 377     # of self.recalculate_locks[locking.LEVEL_NODE]
 378     wanted_nodes = []
 379     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 380     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 381       wanted_nodes.append(instance.primary_node)
 382       if not primary_only:
 383         wanted_nodes.extend(instance.secondary_nodes)
 384
 385     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 386       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 387     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 388       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 389
 390     del self.recalculate_locks[locking.LEVEL_NODE]
 391
 392
 393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 394   """Simple LU which runs no hooks.
 395
 396   This LU is intended as a parent for other LogicalUnits which will
 397   run no hooks, in order to reduce duplicate code.
 398
 399   """
 400   HPATH = None
 401   HTYPE = None
 402
 403   def BuildHooksEnv(self):
 404     """Empty BuildHooksEnv for NoHooksLu.
 405
 406     This just raises an error.
 407
 408     """
 409     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 410
 411   def BuildHooksNodes(self):
 412     """Empty BuildHooksNodes for NoHooksLU.
 413
 414     """
 415     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 416
 417
 418 class Tasklet:
 419   """Tasklet base class.
 420
 421   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 422   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 423   tasklets know nothing about locks.
 424
 425   Subclasses must follow these rules:
 426     - Implement CheckPrereq
 427     - Implement Exec
 428
 429   """
 430   def __init__(self, lu):
 431     self.lu = lu
 432
 433     # Shortcuts
 434     self.cfg = lu.cfg
 435     self.rpc = lu.rpc
 436
 437   def CheckPrereq(self):
 438     """Check prerequisites for this tasklets.
 439
 440     This method should check whether the prerequisites for the execution of
 441     this tasklet are fulfilled. It can do internode communication, but it
 442     should be idempotent - no cluster or system changes are allowed.
 443
 444     The method should raise errors.OpPrereqError in case something is not
 445     fulfilled. Its return value is ignored.
 446
 447     This method should also update all parameters to their canonical form if it
 448     hasn't been done before.
 449
 450     """
 451     pass
 452
 453   def Exec(self, feedback_fn):
 454     """Execute the tasklet.
 455
 456     This method should implement the actual work. It should raise
 457     errors.OpExecError for failures that are somewhat dealt with in code, or
 458     expected.
 459
 460     """
 461     raise NotImplementedError
 462
 463
 464 class _QueryBase:
 465   """Base for query utility classes.
 466
 467   """
 468   #: Attribute holding field definitions
 469   FIELDS = None
 470
 471   def __init__(self, filter_, fields, use_locking):
 472     """Initializes this class.
 473
 474     """
 475     self.use_locking = use_locking
 476
 477     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 478                              namefield="name")
 479     self.requested_data = self.query.RequestedData()
 480     self.names = self.query.RequestedNames()
 481
 482     # Sort only if no names were requested
 483     self.sort_by_name = not self.names
 484
 485     self.do_locking = None
 486     self.wanted = None
 487
 488   def _GetNames(self, lu, all_names, lock_level):
 489     """Helper function to determine names asked for in the query.
 490
 491     """
 492     if self.do_locking:
 493       names = lu.owned_locks(lock_level)
 494     else:
 495       names = all_names
 496
 497     if self.wanted == locking.ALL_SET:
 498       assert not self.names
 499       # caller didn't specify names, so ordering is not important
 500       return utils.NiceSort(names)
 501
 502     # caller specified names and we must keep the same order
 503     assert self.names
 504     assert not self.do_locking or lu.glm.is_owned(lock_level)
 505
 506     missing = set(self.wanted).difference(names)
 507     if missing:
 508       raise errors.OpExecError("Some items were removed before retrieving"
 509                                " their data: %s" % missing)
 510
 511     # Return expanded names
 512     return self.wanted
 513
 514   def ExpandNames(self, lu):
 515     """Expand names for this query.
 516
 517     See L{LogicalUnit.ExpandNames}.
 518
 519     """
 520     raise NotImplementedError()
 521
 522   def DeclareLocks(self, lu, level):
 523     """Declare locks for this query.
 524
 525     See L{LogicalUnit.DeclareLocks}.
 526
 527     """
 528     raise NotImplementedError()
 529
 530   def _GetQueryData(self, lu):
 531     """Collects all data for this query.
 532
 533     @return: Query data object
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def NewStyleQuery(self, lu):
 539     """Collect data and execute query.
 540
 541     """
 542     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 543                                   sort_by_name=self.sort_by_name)
 544
 545   def OldStyleQuery(self, lu):
 546     """Collect data and execute query.
 547
 548     """
 549     return self.query.OldStyleQuery(self._GetQueryData(lu),
 550                                     sort_by_name=self.sort_by_name)
 551
 552
 553 def _ShareAll():
 554   """Returns a dict declaring all lock levels shared.
 555
 556   """
 557   return dict.fromkeys(locking.LEVELS, 1)
 558
 559
 560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 561   """Checks if the owned node groups are still correct for an instance.
 562
 563   @type cfg: L{config.ConfigWriter}
 564   @param cfg: The cluster configuration
 565   @type instance_name: string
 566   @param instance_name: Instance name
 567   @type owned_groups: set or frozenset
 568   @param owned_groups: List of currently owned node groups
 569
 570   """
 571   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 572
 573   if not owned_groups.issuperset(inst_groups):
 574     raise errors.OpPrereqError("Instance %s's node groups changed since"
 575                                " locks were acquired, current groups are"
 576                                " are '%s', owning groups '%s'; retry the"
 577                                " operation" %
 578                                (instance_name,
 579                                 utils.CommaJoin(inst_groups),
 580                                 utils.CommaJoin(owned_groups)),
 581                                errors.ECODE_STATE)
 582
 583   return inst_groups
 584
 585
 586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 587   """Checks if the instances in a node group are still correct.
 588
 589   @type cfg: L{config.ConfigWriter}
 590   @param cfg: The cluster configuration
 591   @type group_uuid: string
 592   @param group_uuid: Node group UUID
 593   @type owned_instances: set or frozenset
 594   @param owned_instances: List of currently owned instances
 595
 596   """
 597   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 598   if owned_instances != wanted_instances:
 599     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 600                                " locks were acquired, wanted '%s', have '%s';"
 601                                " retry the operation" %
 602                                (group_uuid,
 603                                 utils.CommaJoin(wanted_instances),
 604                                 utils.CommaJoin(owned_instances)),
 605                                errors.ECODE_STATE)
 606
 607   return wanted_instances
 608
 609
 610 def _SupportsOob(cfg, node):
 611   """Tells if node supports OOB.
 612
 613   @type cfg: L{config.ConfigWriter}
 614   @param cfg: The cluster configuration
 615   @type node: L{objects.Node}
 616   @param node: The node
 617   @return: The OOB script if supported or an empty string otherwise
 618
 619   """
 620   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 621
 622
 623 def _GetWantedNodes(lu, nodes):
 624   """Returns list of checked and expanded node names.
 625
 626   @type lu: L{LogicalUnit}
 627   @param lu: the logical unit on whose behalf we execute
 628   @type nodes: list
 629   @param nodes: list of node names or None for all nodes
 630   @rtype: list
 631   @return: the list of nodes, sorted
 632   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 633
 634   """
 635   if nodes:
 636     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 637
 638   return utils.NiceSort(lu.cfg.GetNodeList())
 639
 640
 641 def _GetWantedInstances(lu, instances):
 642   """Returns list of checked and expanded instance names.
 643
 644   @type lu: L{LogicalUnit}
 645   @param lu: the logical unit on whose behalf we execute
 646   @type instances: list
 647   @param instances: list of instance names or None for all instances
 648   @rtype: list
 649   @return: the list of instances, sorted
 650   @raise errors.OpPrereqError: if the instances parameter is wrong type
 651   @raise errors.OpPrereqError: if any of the passed instances is not found
 652
 653   """
 654   if instances:
 655     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 656   else:
 657     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 658   return wanted
 659
 660
 661 def _GetUpdatedParams(old_params, update_dict,
 662                       use_default=True, use_none=False):
 663   """Return the new version of a parameter dictionary.
 664
 665   @type old_params: dict
 666   @param old_params: old parameters
 667   @type update_dict: dict
 668   @param update_dict: dict containing new parameter values, or
 669       constants.VALUE_DEFAULT to reset the parameter to its default
 670       value
 671   @param use_default: boolean
 672   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 673       values as 'to be deleted' values
 674   @param use_none: boolean
 675   @type use_none: whether to recognise C{None} values as 'to be
 676       deleted' values
 677   @rtype: dict
 678   @return: the new parameter dictionary
 679
 680   """
 681   params_copy = copy.deepcopy(old_params)
 682   for key, val in update_dict.iteritems():
 683     if ((use_default and val == constants.VALUE_DEFAULT) or
 684         (use_none and val is None)):
 685       try:
 686         del params_copy[key]
 687       except KeyError:
 688         pass
 689     else:
 690       params_copy[key] = val
 691   return params_copy
 692
 693
 694 def _ReleaseLocks(lu, level, names=None, keep=None):
 695   """Releases locks owned by an LU.
 696
 697   @type lu: L{LogicalUnit}
 698   @param level: Lock level
 699   @type names: list or None
 700   @param names: Names of locks to release
 701   @type keep: list or None
 702   @param keep: Names of locks to retain
 703
 704   """
 705   assert not (keep is not None and names is not None), \
 706          "Only one of the 'names' and the 'keep' parameters can be given"
 707
 708   if names is not None:
 709     should_release = names.__contains__
 710   elif keep:
 711     should_release = lambda name: name not in keep
 712   else:
 713     should_release = None
 714
 715   if should_release:
 716     retain = []
 717     release = []
 718
 719     # Determine which locks to release
 720     for name in lu.owned_locks(level):
 721       if should_release(name):
 722         release.append(name)
 723       else:
 724         retain.append(name)
 725
 726     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 727
 728     # Release just some locks
 729     lu.glm.release(level, names=release)
 730
 731     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 732   else:
 733     # Release everything
 734     lu.glm.release(level)
 735
 736     assert not lu.glm.is_owned(level), "No locks should be owned"
 737
 738
 739 def _MapInstanceDisksToNodes(instances):
 740   """Creates a map from (node, volume) to instance name.
 741
 742   @type instances: list of L{objects.Instance}
 743   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 744
 745   """
 746   return dict(((node, vol), inst.name)
 747               for inst in instances
 748               for (node, vols) in inst.MapLVsByNode().items()
 749               for vol in vols)
 750
 751
 752 def _RunPostHook(lu, node_name):
 753   """Runs the post-hook for an opcode on a single node.
 754
 755   """
 756   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 757   try:
 758     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 759   except:
 760     # pylint: disable=W0702
 761     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 762
 763
 764 def _CheckOutputFields(static, dynamic, selected):
 765   """Checks whether all selected fields are valid.
 766
 767   @type static: L{utils.FieldSet}
 768   @param static: static fields set
 769   @type dynamic: L{utils.FieldSet}
 770   @param dynamic: dynamic fields set
 771
 772   """
 773   f = utils.FieldSet()
 774   f.Extend(static)
 775   f.Extend(dynamic)
 776
 777   delta = f.NonMatching(selected)
 778   if delta:
 779     raise errors.OpPrereqError("Unknown output fields selected: %s"
 780                                % ",".join(delta), errors.ECODE_INVAL)
 781
 782
 783 def _CheckGlobalHvParams(params):
 784   """Validates that given hypervisor params are not global ones.
 785
 786   This will ensure that instances don't get customised versions of
 787   global params.
 788
 789   """
 790   used_globals = constants.HVC_GLOBALS.intersection(params)
 791   if used_globals:
 792     msg = ("The following hypervisor parameters are global and cannot"
 793            " be customized at instance level, please modify them at"
 794            " cluster level: %s" % utils.CommaJoin(used_globals))
 795     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 796
 797
 798 def _CheckNodeOnline(lu, node, msg=None):
 799   """Ensure that a given node is online.
 800
 801   @param lu: the LU on behalf of which we make the check
 802   @param node: the node to check
 803   @param msg: if passed, should be a message to replace the default one
 804   @raise errors.OpPrereqError: if the node is offline
 805
 806   """
 807   if msg is None:
 808     msg = "Can't use offline node"
 809   if lu.cfg.GetNodeInfo(node).offline:
 810     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 811
 812
 813 def _CheckNodeNotDrained(lu, node):
 814   """Ensure that a given node is not drained.
 815
 816   @param lu: the LU on behalf of which we make the check
 817   @param node: the node to check
 818   @raise errors.OpPrereqError: if the node is drained
 819
 820   """
 821   if lu.cfg.GetNodeInfo(node).drained:
 822     raise errors.OpPrereqError("Can't use drained node %s" % node,
 823                                errors.ECODE_STATE)
 824
 825
 826 def _CheckNodeVmCapable(lu, node):
 827   """Ensure that a given node is vm capable.
 828
 829   @param lu: the LU on behalf of which we make the check
 830   @param node: the node to check
 831   @raise errors.OpPrereqError: if the node is not vm capable
 832
 833   """
 834   if not lu.cfg.GetNodeInfo(node).vm_capable:
 835     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 836                                errors.ECODE_STATE)
 837
 838
 839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 840   """Ensure that a node supports a given OS.
 841
 842   @param lu: the LU on behalf of which we make the check
 843   @param node: the node to check
 844   @param os_name: the OS to query about
 845   @param force_variant: whether to ignore variant errors
 846   @raise errors.OpPrereqError: if the node is not supporting the OS
 847
 848   """
 849   result = lu.rpc.call_os_get(node, os_name)
 850   result.Raise("OS '%s' not in supported OS list for node %s" %
 851                (os_name, node),
 852                prereq=True, ecode=errors.ECODE_INVAL)
 853   if not force_variant:
 854     _CheckOSVariant(result.payload, os_name)
 855
 856
 857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 858   """Ensure that a node has the given secondary ip.
 859
 860   @type lu: L{LogicalUnit}
 861   @param lu: the LU on behalf of which we make the check
 862   @type node: string
 863   @param node: the node to check
 864   @type secondary_ip: string
 865   @param secondary_ip: the ip to check
 866   @type prereq: boolean
 867   @param prereq: whether to throw a prerequisite or an execute error
 868   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 869   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 870
 871   """
 872   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 873   result.Raise("Failure checking secondary ip on node %s" % node,
 874                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 875   if not result.payload:
 876     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 877            " please fix and re-run this command" % secondary_ip)
 878     if prereq:
 879       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 880     else:
 881       raise errors.OpExecError(msg)
 882
 883
 884 def _GetClusterDomainSecret():
 885   """Reads the cluster domain secret.
 886
 887   """
 888   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 889                                strict=True)
 890
 891
 892 def _CheckInstanceDown(lu, instance, reason):
 893   """Ensure that an instance is not running."""
 894   if instance.admin_up:
 895     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 896                                (instance.name, reason), errors.ECODE_STATE)
 897
 898   pnode = instance.primary_node
 899   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 900   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 901               prereq=True, ecode=errors.ECODE_ENVIRON)
 902
 903   if instance.name in ins_l.payload:
 904     raise errors.OpPrereqError("Instance %s is running, %s" %
 905                                (instance.name, reason), errors.ECODE_STATE)
 906
 907
 908 def _ExpandItemName(fn, name, kind):
 909   """Expand an item name.
 910
 911   @param fn: the function to use for expansion
 912   @param name: requested item name
 913   @param kind: text description ('Node' or 'Instance')
 914   @return: the resolved (full) name
 915   @raise errors.OpPrereqError: if the item is not found
 916
 917   """
 918   full_name = fn(name)
 919   if full_name is None:
 920     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 921                                errors.ECODE_NOENT)
 922   return full_name
 923
 924
 925 def _ExpandNodeName(cfg, name):
 926   """Wrapper over L{_ExpandItemName} for nodes."""
 927   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 928
 929
 930 def _ExpandInstanceName(cfg, name):
 931   """Wrapper over L{_ExpandItemName} for instance."""
 932   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 933
 934
 935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 936                           memory, vcpus, nics, disk_template, disks,
 937                           bep, hvp, hypervisor_name, tags):
 938   """Builds instance related env variables for hooks
 939
 940   This builds the hook environment from individual variables.
 941
 942   @type name: string
 943   @param name: the name of the instance
 944   @type primary_node: string
 945   @param primary_node: the name of the instance's primary node
 946   @type secondary_nodes: list
 947   @param secondary_nodes: list of secondary nodes as strings
 948   @type os_type: string
 949   @param os_type: the name of the instance's OS
 950   @type status: boolean
 951   @param status: the should_run status of the instance
 952   @type memory: string
 953   @param memory: the memory size of the instance
 954   @type vcpus: string
 955   @param vcpus: the count of VCPUs the instance has
 956   @type nics: list
 957   @param nics: list of tuples (ip, mac, mode, link) representing
 958       the NICs the instance has
 959   @type disk_template: string
 960   @param disk_template: the disk template of the instance
 961   @type disks: list
 962   @param disks: the list of (size, mode) pairs
 963   @type bep: dict
 964   @param bep: the backend parameters for the instance
 965   @type hvp: dict
 966   @param hvp: the hypervisor parameters for the instance
 967   @type hypervisor_name: string
 968   @param hypervisor_name: the hypervisor for the instance
 969   @type tags: list
 970   @param tags: list of instance tags as strings
 971   @rtype: dict
 972   @return: the hook environment for this instance
 973
 974   """
 975   if status:
 976     str_status = "up"
 977   else:
 978     str_status = "down"
 979   env = {
 980     "OP_TARGET": name,
 981     "INSTANCE_NAME": name,
 982     "INSTANCE_PRIMARY": primary_node,
 983     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 984     "INSTANCE_OS_TYPE": os_type,
 985     "INSTANCE_STATUS": str_status,
 986     "INSTANCE_MEMORY": memory,
 987     "INSTANCE_VCPUS": vcpus,
 988     "INSTANCE_DISK_TEMPLATE": disk_template,
 989     "INSTANCE_HYPERVISOR": hypervisor_name,
 990   }
 991
 992   if nics:
 993     nic_count = len(nics)
 994     for idx, (ip, mac, mode, link) in enumerate(nics):
 995       if ip is None:
 996         ip = ""
 997       env["INSTANCE_NIC%d_IP" % idx] = ip
 998       env["INSTANCE_NIC%d_MAC" % idx] = mac
 999       env["INSTANCE_NIC%d_MODE" % idx] = mode
1000       env["INSTANCE_NIC%d_LINK" % idx] = link
1001       if mode == constants.NIC_MODE_BRIDGED:
1002         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003   else:
1004     nic_count = 0
1005
1006   env["INSTANCE_NIC_COUNT"] = nic_count
1007
1008   if disks:
1009     disk_count = len(disks)
1010     for idx, (size, mode) in enumerate(disks):
1011       env["INSTANCE_DISK%d_SIZE" % idx] = size
1012       env["INSTANCE_DISK%d_MODE" % idx] = mode
1013   else:
1014     disk_count = 0
1015
1016   env["INSTANCE_DISK_COUNT"] = disk_count
1017
1018   if not tags:
1019     tags = []
1020
1021   env["INSTANCE_TAGS"] = " ".join(tags)
1022
1023   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024     for key, value in source.items():
1025       env["INSTANCE_%s_%s" % (kind, key)] = value
1026
1027   return env
1028
1029
1030 def _NICListToTuple(lu, nics):
1031   """Build a list of nic information tuples.
1032
1033   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034   value in LUInstanceQueryData.
1035
1036   @type lu:  L{LogicalUnit}
1037   @param lu: the logical unit on whose behalf we execute
1038   @type nics: list of L{objects.NIC}
1039   @param nics: list of nics to convert to hooks tuples
1040
1041   """
1042   hooks_nics = []
1043   cluster = lu.cfg.GetClusterInfo()
1044   for nic in nics:
1045     ip = nic.ip
1046     mac = nic.mac
1047     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048     mode = filled_params[constants.NIC_MODE]
1049     link = filled_params[constants.NIC_LINK]
1050     hooks_nics.append((ip, mac, mode, link))
1051   return hooks_nics
1052
1053
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055   """Builds instance related env variables for hooks from an object.
1056
1057   @type lu: L{LogicalUnit}
1058   @param lu: the logical unit on whose behalf we execute
1059   @type instance: L{objects.Instance}
1060   @param instance: the instance for which we should build the
1061       environment
1062   @type override: dict
1063   @param override: dictionary with key/values that will override
1064       our values
1065   @rtype: dict
1066   @return: the hook environment dictionary
1067
1068   """
1069   cluster = lu.cfg.GetClusterInfo()
1070   bep = cluster.FillBE(instance)
1071   hvp = cluster.FillHV(instance)
1072   args = {
1073     "name": instance.name,
1074     "primary_node": instance.primary_node,
1075     "secondary_nodes": instance.secondary_nodes,
1076     "os_type": instance.os,
1077     "status": instance.admin_up,
1078     "memory": bep[constants.BE_MEMORY],
1079     "vcpus": bep[constants.BE_VCPUS],
1080     "nics": _NICListToTuple(lu, instance.nics),
1081     "disk_template": instance.disk_template,
1082     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083     "bep": bep,
1084     "hvp": hvp,
1085     "hypervisor_name": instance.hypervisor,
1086     "tags": instance.tags,
1087   }
1088   if override:
1089     args.update(override)
1090   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091
1092
1093 def _AdjustCandidatePool(lu, exceptions):
1094   """Adjust the candidate pool after node operations.
1095
1096   """
1097   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098   if mod_list:
1099     lu.LogInfo("Promoted nodes to master candidate role: %s",
1100                utils.CommaJoin(node.name for node in mod_list))
1101     for name in mod_list:
1102       lu.context.ReaddNode(name)
1103   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104   if mc_now > mc_max:
1105     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106                (mc_now, mc_max))
1107
1108
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110   """Decide whether I should promote myself as a master candidate.
1111
1112   """
1113   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115   # the new node will increase mc_max with one, so:
1116   mc_should = min(mc_should + 1, cp_size)
1117   return mc_now < mc_should
1118
1119
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121   """Check that the brigdes needed by a list of nics exist.
1122
1123   """
1124   cluster = lu.cfg.GetClusterInfo()
1125   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126   brlist = [params[constants.NIC_LINK] for params in paramslist
1127             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128   if brlist:
1129     result = lu.rpc.call_bridges_exist(target_node, brlist)
1130     result.Raise("Error checking bridges on destination node '%s'" %
1131                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132
1133
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135   """Check that the brigdes needed by an instance exist.
1136
1137   """
1138   if node is None:
1139     node = instance.primary_node
1140   _CheckNicsBridgesExist(lu, instance.nics, node)
1141
1142
1143 def _CheckOSVariant(os_obj, name):
1144   """Check whether an OS name conforms to the os variants specification.
1145
1146   @type os_obj: L{objects.OS}
1147   @param os_obj: OS object to check
1148   @type name: string
1149   @param name: OS name passed by the user, to check for validity
1150
1151   """
1152   variant = objects.OS.GetVariant(name)
1153   if not os_obj.supported_variants:
1154     if variant:
1155       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156                                  " passed)" % (os_obj.name, variant),
1157                                  errors.ECODE_INVAL)
1158     return
1159   if not variant:
1160     raise errors.OpPrereqError("OS name must include a variant",
1161                                errors.ECODE_INVAL)
1162
1163   if variant not in os_obj.supported_variants:
1164     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165
1166
1167 def _GetNodeInstancesInner(cfg, fn):
1168   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169
1170
1171 def _GetNodeInstances(cfg, node_name):
1172   """Returns a list of all primary and secondary instances on a node.
1173
1174   """
1175
1176   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177
1178
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180   """Returns primary instances on a node.
1181
1182   """
1183   return _GetNodeInstancesInner(cfg,
1184                                 lambda inst: node_name == inst.primary_node)
1185
1186
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188   """Returns secondary instances on a node.
1189
1190   """
1191   return _GetNodeInstancesInner(cfg,
1192                                 lambda inst: node_name in inst.secondary_nodes)
1193
1194
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196   """Returns the arguments for a storage type.
1197
1198   """
1199   # Special case for file storage
1200   if storage_type == constants.ST_FILE:
1201     # storage.FileStorage wants a list of storage directories
1202     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203
1204   return []
1205
1206
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208   faulty = []
1209
1210   for dev in instance.disks:
1211     cfg.SetDiskID(dev, node_name)
1212
1213   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214   result.Raise("Failed to get disk status from node %s" % node_name,
1215                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216
1217   for idx, bdev_status in enumerate(result.payload):
1218     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219       faulty.append(idx)
1220
1221   return faulty
1222
1223
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225   """Check the sanity of iallocator and node arguments and use the
1226   cluster-wide iallocator if appropriate.
1227
1228   Check that at most one of (iallocator, node) is specified. If none is
1229   specified, then the LU's opcode's iallocator slot is filled with the
1230   cluster-wide default iallocator.
1231
1232   @type iallocator_slot: string
1233   @param iallocator_slot: the name of the opcode iallocator slot
1234   @type node_slot: string
1235   @param node_slot: the name of the opcode target node slot
1236
1237   """
1238   node = getattr(lu.op, node_slot, None)
1239   iallocator = getattr(lu.op, iallocator_slot, None)
1240
1241   if node is not None and iallocator is not None:
1242     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243                                errors.ECODE_INVAL)
1244   elif node is None and iallocator is None:
1245     default_iallocator = lu.cfg.GetDefaultIAllocator()
1246     if default_iallocator:
1247       setattr(lu.op, iallocator_slot, default_iallocator)
1248     else:
1249       raise errors.OpPrereqError("No iallocator or node given and no"
1250                                  " cluster-wide default iallocator found;"
1251                                  " please specify either an iallocator or a"
1252                                  " node, or set a cluster-wide default"
1253                                  " iallocator")
1254
1255
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257   """Decides on which iallocator to use.
1258
1259   @type cfg: L{config.ConfigWriter}
1260   @param cfg: Cluster configuration object
1261   @type iallocator: string or None
1262   @param iallocator: Iallocator specified in opcode
1263   @rtype: string
1264   @return: Iallocator name
1265
1266   """
1267   if not iallocator:
1268     # Use default iallocator
1269     iallocator = cfg.GetDefaultIAllocator()
1270
1271   if not iallocator:
1272     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273                                " opcode nor as a cluster-wide default",
1274                                errors.ECODE_INVAL)
1275
1276   return iallocator
1277
1278
1279 class LUClusterPostInit(LogicalUnit):
1280   """Logical unit for running hooks after cluster initialization.
1281
1282   """
1283   HPATH = "cluster-init"
1284   HTYPE = constants.HTYPE_CLUSTER
1285
1286   def BuildHooksEnv(self):
1287     """Build hooks env.
1288
1289     """
1290     return {
1291       "OP_TARGET": self.cfg.GetClusterName(),
1292       }
1293
1294   def BuildHooksNodes(self):
1295     """Build hooks nodes.
1296
1297     """
1298     return ([], [self.cfg.GetMasterNode()])
1299
1300   def Exec(self, feedback_fn):
1301     """Nothing to do.
1302
1303     """
1304     return True
1305
1306
1307 class LUClusterDestroy(LogicalUnit):
1308   """Logical unit for destroying the cluster.
1309
1310   """
1311   HPATH = "cluster-destroy"
1312   HTYPE = constants.HTYPE_CLUSTER
1313
1314   def BuildHooksEnv(self):
1315     """Build hooks env.
1316
1317     """
1318     return {
1319       "OP_TARGET": self.cfg.GetClusterName(),
1320       }
1321
1322   def BuildHooksNodes(self):
1323     """Build hooks nodes.
1324
1325     """
1326     return ([], [])
1327
1328   def CheckPrereq(self):
1329     """Check prerequisites.
1330
1331     This checks whether the cluster is empty.
1332
1333     Any errors are signaled by raising errors.OpPrereqError.
1334
1335     """
1336     master = self.cfg.GetMasterNode()
1337
1338     nodelist = self.cfg.GetNodeList()
1339     if len(nodelist) != 1 or nodelist[0] != master:
1340       raise errors.OpPrereqError("There are still %d node(s) in"
1341                                  " this cluster." % (len(nodelist) - 1),
1342                                  errors.ECODE_INVAL)
1343     instancelist = self.cfg.GetInstanceList()
1344     if instancelist:
1345       raise errors.OpPrereqError("There are still %d instance(s) in"
1346                                  " this cluster." % len(instancelist),
1347                                  errors.ECODE_INVAL)
1348
1349   def Exec(self, feedback_fn):
1350     """Destroys the cluster.
1351
1352     """
1353     master = self.cfg.GetMasterNode()
1354
1355     # Run post hooks on master node before it's removed
1356     _RunPostHook(self, master)
1357
1358     result = self.rpc.call_node_stop_master(master, False)
1359     result.Raise("Could not disable the master role")
1360
1361     return master
1362
1363
1364 def _VerifyCertificate(filename):
1365   """Verifies a certificate for L{LUClusterVerifyConfig}.
1366
1367   @type filename: string
1368   @param filename: Path to PEM file
1369
1370   """
1371   try:
1372     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373                                            utils.ReadFile(filename))
1374   except Exception, err: # pylint: disable=W0703
1375     return (LUClusterVerifyConfig.ETYPE_ERROR,
1376             "Failed to load X509 certificate %s: %s" % (filename, err))
1377
1378   (errcode, msg) = \
1379     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380                                 constants.SSL_CERT_EXPIRATION_ERROR)
1381
1382   if msg:
1383     fnamemsg = "While verifying %s: %s" % (filename, msg)
1384   else:
1385     fnamemsg = None
1386
1387   if errcode is None:
1388     return (None, fnamemsg)
1389   elif errcode == utils.CERT_WARNING:
1390     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391   elif errcode == utils.CERT_ERROR:
1392     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393
1394   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395
1396
1397 def _GetAllHypervisorParameters(cluster, instances):
1398   """Compute the set of all hypervisor parameters.
1399
1400   @type cluster: L{objects.Cluster}
1401   @param cluster: the cluster object
1402   @param instances: list of L{objects.Instance}
1403   @param instances: additional instances from which to obtain parameters
1404   @rtype: list of (origin, hypervisor, parameters)
1405   @return: a list with all parameters found, indicating the hypervisor they
1406        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407
1408   """
1409   hvp_data = []
1410
1411   for hv_name in cluster.enabled_hypervisors:
1412     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413
1414   for os_name, os_hvp in cluster.os_hvp.items():
1415     for hv_name, hv_params in os_hvp.items():
1416       if hv_params:
1417         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419
1420   # TODO: collapse identical parameter values in a single one
1421   for instance in instances:
1422     if instance.hvparams:
1423       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424                        cluster.FillHV(instance)))
1425
1426   return hvp_data
1427
1428
1429 class _VerifyErrors(object):
1430   """Mix-in for cluster/group verify LUs.
1431
1432   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433   self.op and self._feedback_fn to be available.)
1434
1435   """
1436   TCLUSTER = "cluster"
1437   TNODE = "node"
1438   TINSTANCE = "instance"
1439
1440   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443   ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444   ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452   ENODEDRBD = (TNODE, "ENODEDRBD")
1453   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456   ENODEHV = (TNODE, "ENODEHV")
1457   ENODELVM = (TNODE, "ENODELVM")
1458   ENODEN1 = (TNODE, "ENODEN1")
1459   ENODENET = (TNODE, "ENODENET")
1460   ENODEOS = (TNODE, "ENODEOS")
1461   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463   ENODERPC = (TNODE, "ENODERPC")
1464   ENODESSH = (TNODE, "ENODESSH")
1465   ENODEVERSION = (TNODE, "ENODEVERSION")
1466   ENODESETUP = (TNODE, "ENODESETUP")
1467   ENODETIME = (TNODE, "ENODETIME")
1468   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469
1470   ETYPE_FIELD = "code"
1471   ETYPE_ERROR = "ERROR"
1472   ETYPE_WARNING = "WARNING"
1473
1474   def _Error(self, ecode, item, msg, *args, **kwargs):
1475     """Format an error message.
1476
1477     Based on the opcode's error_codes parameter, either format a
1478     parseable error code, or a simpler error string.
1479
1480     This must be called only from Exec and functions called from Exec.
1481
1482     """
1483     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484     itype, etxt = ecode
1485     # first complete the msg
1486     if args:
1487       msg = msg % args
1488     # then format the whole message
1489     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491     else:
1492       if item:
1493         item = " " + item
1494       else:
1495         item = ""
1496       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497     # and finally report it via the feedback_fn
1498     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499
1500   def _ErrorIf(self, cond, *args, **kwargs):
1501     """Log an error message if the passed condition is True.
1502
1503     """
1504     cond = (bool(cond)
1505             or self.op.debug_simulate_errors) # pylint: disable=E1101
1506     if cond:
1507       self._Error(*args, **kwargs)
1508     # do not mark the operation as failed for WARN cases only
1509     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510       self.bad = self.bad or cond
1511
1512
1513 class LUClusterVerify(NoHooksLU):
1514   """Submits all jobs necessary to verify the cluster.
1515
1516   """
1517   REQ_BGL = False
1518
1519   def ExpandNames(self):
1520     self.needed_locks = {}
1521
1522   def Exec(self, feedback_fn):
1523     jobs = []
1524
1525     if self.op.group_name:
1526       groups = [self.op.group_name]
1527       depends_fn = lambda: None
1528     else:
1529       groups = self.cfg.GetNodeGroupList()
1530
1531       # Verify global configuration
1532       jobs.append([opcodes.OpClusterVerifyConfig()])
1533
1534       # Always depend on global verification
1535       depends_fn = lambda: [(-len(jobs), [])]
1536
1537     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538                                               depends=depends_fn())]
1539                 for group in groups)
1540
1541     # Fix up all parameters
1542     for op in itertools.chain(*jobs): # pylint: disable=W0142
1543       op.debug_simulate_errors = self.op.debug_simulate_errors
1544       op.verbose = self.op.verbose
1545       op.error_codes = self.op.error_codes
1546       try:
1547         op.skip_checks = self.op.skip_checks
1548       except AttributeError:
1549         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550
1551     return ResultWithJobs(jobs)
1552
1553
1554 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555   """Verifies the cluster config.
1556
1557   """
1558   REQ_BGL = True
1559
1560   def _VerifyHVP(self, hvp_data):
1561     """Verifies locally the syntax of the hypervisor parameters.
1562
1563     """
1564     for item, hv_name, hv_params in hvp_data:
1565       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566              (item, hv_name))
1567       try:
1568         hv_class = hypervisor.GetHypervisor(hv_name)
1569         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570         hv_class.CheckParameterSyntax(hv_params)
1571       except errors.GenericError, err:
1572         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573
1574   def ExpandNames(self):
1575     # Information can be safely retrieved as the BGL is acquired in exclusive
1576     # mode
1577     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579     self.all_node_info = self.cfg.GetAllNodesInfo()
1580     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581     self.needed_locks = {}
1582
1583   def Exec(self, feedback_fn):
1584     """Verify integrity of cluster, performing various test on nodes.
1585
1586     """
1587     self.bad = False
1588     self._feedback_fn = feedback_fn
1589
1590     feedback_fn("* Verifying cluster config")
1591
1592     for msg in self.cfg.VerifyConfig():
1593       self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594
1595     feedback_fn("* Verifying cluster certificate files")
1596
1597     for cert_filename in constants.ALL_CERT_FILES:
1598       (errcode, msg) = _VerifyCertificate(cert_filename)
1599       self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600
1601     feedback_fn("* Verifying hypervisor parameters")
1602
1603     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604                                                 self.all_inst_info.values()))
1605
1606     feedback_fn("* Verifying all nodes belong to an existing group")
1607
1608     # We do this verification here because, should this bogus circumstance
1609     # occur, it would never be caught by VerifyGroup, which only acts on
1610     # nodes/instances reachable from existing node groups.
1611
1612     dangling_nodes = set(node.name for node in self.all_node_info.values()
1613                          if node.group not in self.all_group_info)
1614
1615     dangling_instances = {}
1616     no_node_instances = []
1617
1618     for inst in self.all_inst_info.values():
1619       if inst.primary_node in dangling_nodes:
1620         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621       elif inst.primary_node not in self.all_node_info:
1622         no_node_instances.append(inst.name)
1623
1624     pretty_dangling = [
1625         "%s (%s)" %
1626         (node.name,
1627          utils.CommaJoin(dangling_instances.get(node.name,
1628                                                 ["no instances"])))
1629         for node in dangling_nodes]
1630
1631     self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632                   "the following nodes (and their instances) belong to a non"
1633                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1634
1635     self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636                   "the following instances have a non-existing primary-node:"
1637                   " %s", utils.CommaJoin(no_node_instances))
1638
1639     return not self.bad
1640
1641
1642 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643   """Verifies the status of a node group.
1644
1645   """
1646   HPATH = "cluster-verify"
1647   HTYPE = constants.HTYPE_CLUSTER
1648   REQ_BGL = False
1649
1650   _HOOKS_INDENT_RE = re.compile("^", re.M)
1651
1652   class NodeImage(object):
1653     """A class representing the logical and physical status of a node.
1654
1655     @type name: string
1656     @ivar name: the node name to which this object refers
1657     @ivar volumes: a structure as returned from
1658         L{ganeti.backend.GetVolumeList} (runtime)
1659     @ivar instances: a list of running instances (runtime)
1660     @ivar pinst: list of configured primary instances (config)
1661     @ivar sinst: list of configured secondary instances (config)
1662     @ivar sbp: dictionary of {primary-node: list of instances} for all
1663         instances for which this node is secondary (config)
1664     @ivar mfree: free memory, as reported by hypervisor (runtime)
1665     @ivar dfree: free disk, as reported by the node (runtime)
1666     @ivar offline: the offline status (config)
1667     @type rpc_fail: boolean
1668     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669         not whether the individual keys were correct) (runtime)
1670     @type lvm_fail: boolean
1671     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672     @type hyp_fail: boolean
1673     @ivar hyp_fail: whether the RPC call didn't return the instance list
1674     @type ghost: boolean
1675     @ivar ghost: whether this is a known node or not (config)
1676     @type os_fail: boolean
1677     @ivar os_fail: whether the RPC call didn't return valid OS data
1678     @type oslist: list
1679     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680     @type vm_capable: boolean
1681     @ivar vm_capable: whether the node can host instances
1682
1683     """
1684     def __init__(self, offline=False, name=None, vm_capable=True):
1685       self.name = name
1686       self.volumes = {}
1687       self.instances = []
1688       self.pinst = []
1689       self.sinst = []
1690       self.sbp = {}
1691       self.mfree = 0
1692       self.dfree = 0
1693       self.offline = offline
1694       self.vm_capable = vm_capable
1695       self.rpc_fail = False
1696       self.lvm_fail = False
1697       self.hyp_fail = False
1698       self.ghost = False
1699       self.os_fail = False
1700       self.oslist = {}
1701
1702   def ExpandNames(self):
1703     # This raises errors.OpPrereqError on its own:
1704     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705
1706     # Get instances in node group; this is unsafe and needs verification later
1707     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1708
1709     self.needed_locks = {
1710       locking.LEVEL_INSTANCE: inst_names,
1711       locking.LEVEL_NODEGROUP: [self.group_uuid],
1712       locking.LEVEL_NODE: [],
1713       }
1714
1715     self.share_locks = _ShareAll()
1716
1717   def DeclareLocks(self, level):
1718     if level == locking.LEVEL_NODE:
1719       # Get members of node group; this is unsafe and needs verification later
1720       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1721
1722       all_inst_info = self.cfg.GetAllInstancesInfo()
1723
1724       # In Exec(), we warn about mirrored instances that have primary and
1725       # secondary living in separate node groups. To fully verify that
1726       # volumes for these instances are healthy, we will need to do an
1727       # extra call to their secondaries. We ensure here those nodes will
1728       # be locked.
1729       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730         # Important: access only the instances whose lock is owned
1731         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732           nodes.update(all_inst_info[inst].secondary_nodes)
1733
1734       self.needed_locks[locking.LEVEL_NODE] = nodes
1735
1736   def CheckPrereq(self):
1737     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1739
1740     group_nodes = set(self.group_info.members)
1741     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1742
1743     unlocked_nodes = \
1744         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1745
1746     unlocked_instances = \
1747         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1748
1749     if unlocked_nodes:
1750       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751                                  utils.CommaJoin(unlocked_nodes))
1752
1753     if unlocked_instances:
1754       raise errors.OpPrereqError("Missing lock for instances: %s" %
1755                                  utils.CommaJoin(unlocked_instances))
1756
1757     self.all_node_info = self.cfg.GetAllNodesInfo()
1758     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1759
1760     self.my_node_names = utils.NiceSort(group_nodes)
1761     self.my_inst_names = utils.NiceSort(group_instances)
1762
1763     self.my_node_info = dict((name, self.all_node_info[name])
1764                              for name in self.my_node_names)
1765
1766     self.my_inst_info = dict((name, self.all_inst_info[name])
1767                              for name in self.my_inst_names)
1768
1769     # We detect here the nodes that will need the extra RPC calls for verifying
1770     # split LV volumes; they should be locked.
1771     extra_lv_nodes = set()
1772
1773     for inst in self.my_inst_info.values():
1774       if inst.disk_template in constants.DTS_INT_MIRROR:
1775         group = self.my_node_info[inst.primary_node].group
1776         for nname in inst.secondary_nodes:
1777           if self.all_node_info[nname].group != group:
1778             extra_lv_nodes.add(nname)
1779
1780     unlocked_lv_nodes = \
1781         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1782
1783     if unlocked_lv_nodes:
1784       raise errors.OpPrereqError("these nodes could be locked: %s" %
1785                                  utils.CommaJoin(unlocked_lv_nodes))
1786     self.extra_lv_nodes = list(extra_lv_nodes)
1787
1788   def _VerifyNode(self, ninfo, nresult):
1789     """Perform some basic validation on data returned from a node.
1790
1791       - check the result data structure is well formed and has all the
1792         mandatory fields
1793       - check ganeti version
1794
1795     @type ninfo: L{objects.Node}
1796     @param ninfo: the node to check
1797     @param nresult: the results from the node
1798     @rtype: boolean
1799     @return: whether overall this call was successful (and we can expect
1800          reasonable values in the respose)
1801
1802     """
1803     node = ninfo.name
1804     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1805
1806     # main result, nresult should be a non-empty dict
1807     test = not nresult or not isinstance(nresult, dict)
1808     _ErrorIf(test, self.ENODERPC, node,
1809                   "unable to verify node: no data returned")
1810     if test:
1811       return False
1812
1813     # compares ganeti version
1814     local_version = constants.PROTOCOL_VERSION
1815     remote_version = nresult.get("version", None)
1816     test = not (remote_version and
1817                 isinstance(remote_version, (list, tuple)) and
1818                 len(remote_version) == 2)
1819     _ErrorIf(test, self.ENODERPC, node,
1820              "connection to node returned invalid data")
1821     if test:
1822       return False
1823
1824     test = local_version != remote_version[0]
1825     _ErrorIf(test, self.ENODEVERSION, node,
1826              "incompatible protocol versions: master %s,"
1827              " node %s", local_version, remote_version[0])
1828     if test:
1829       return False
1830
1831     # node seems compatible, we can actually try to look into its results
1832
1833     # full package version
1834     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835                   self.ENODEVERSION, node,
1836                   "software version mismatch: master %s, node %s",
1837                   constants.RELEASE_VERSION, remote_version[1],
1838                   code=self.ETYPE_WARNING)
1839
1840     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841     if ninfo.vm_capable and isinstance(hyp_result, dict):
1842       for hv_name, hv_result in hyp_result.iteritems():
1843         test = hv_result is not None
1844         _ErrorIf(test, self.ENODEHV, node,
1845                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1846
1847     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848     if ninfo.vm_capable and isinstance(hvp_result, list):
1849       for item, hv_name, hv_result in hvp_result:
1850         _ErrorIf(True, self.ENODEHV, node,
1851                  "hypervisor %s parameter verify failure (source %s): %s",
1852                  hv_name, item, hv_result)
1853
1854     test = nresult.get(constants.NV_NODESETUP,
1855                        ["Missing NODESETUP results"])
1856     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1857              "; ".join(test))
1858
1859     return True
1860
1861   def _VerifyNodeTime(self, ninfo, nresult,
1862                       nvinfo_starttime, nvinfo_endtime):
1863     """Check the node time.
1864
1865     @type ninfo: L{objects.Node}
1866     @param ninfo: the node to check
1867     @param nresult: the remote results for the node
1868     @param nvinfo_starttime: the start time of the RPC call
1869     @param nvinfo_endtime: the end time of the RPC call
1870
1871     """
1872     node = ninfo.name
1873     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1874
1875     ntime = nresult.get(constants.NV_TIME, None)
1876     try:
1877       ntime_merged = utils.MergeTime(ntime)
1878     except (ValueError, TypeError):
1879       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1880       return
1881
1882     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1886     else:
1887       ntime_diff = None
1888
1889     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890              "Node time diverges by at least %s from master node time",
1891              ntime_diff)
1892
1893   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894     """Check the node LVM results.
1895
1896     @type ninfo: L{objects.Node}
1897     @param ninfo: the node to check
1898     @param nresult: the remote results for the node
1899     @param vg_name: the configured VG name
1900
1901     """
1902     if vg_name is None:
1903       return
1904
1905     node = ninfo.name
1906     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1907
1908     # checks vg existence and size > 20G
1909     vglist = nresult.get(constants.NV_VGLIST, None)
1910     test = not vglist
1911     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1912     if not test:
1913       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914                                             constants.MIN_VG_SIZE)
1915       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1916
1917     # check pv names
1918     pvlist = nresult.get(constants.NV_PVLIST, None)
1919     test = pvlist is None
1920     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1921     if not test:
1922       # check that ':' is not present in PV names, since it's a
1923       # special character for lvcreate (denotes the range of PEs to
1924       # use on the PV)
1925       for _, pvname, owner_vg in pvlist:
1926         test = ":" in pvname
1927         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928                  " '%s' of VG '%s'", pvname, owner_vg)
1929
1930   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931     """Check the node bridges.
1932
1933     @type ninfo: L{objects.Node}
1934     @param ninfo: the node to check
1935     @param nresult: the remote results for the node
1936     @param bridges: the expected list of bridges
1937
1938     """
1939     if not bridges:
1940       return
1941
1942     node = ninfo.name
1943     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1944
1945     missing = nresult.get(constants.NV_BRIDGES, None)
1946     test = not isinstance(missing, list)
1947     _ErrorIf(test, self.ENODENET, node,
1948              "did not return valid bridge information")
1949     if not test:
1950       _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951                utils.CommaJoin(sorted(missing)))
1952
1953   def _VerifyNodeNetwork(self, ninfo, nresult):
1954     """Check the node network connectivity results.
1955
1956     @type ninfo: L{objects.Node}
1957     @param ninfo: the node to check
1958     @param nresult: the remote results for the node
1959
1960     """
1961     node = ninfo.name
1962     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1963
1964     test = constants.NV_NODELIST not in nresult
1965     _ErrorIf(test, self.ENODESSH, node,
1966              "node hasn't returned node ssh connectivity data")
1967     if not test:
1968       if nresult[constants.NV_NODELIST]:
1969         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970           _ErrorIf(True, self.ENODESSH, node,
1971                    "ssh communication with node '%s': %s", a_node, a_msg)
1972
1973     test = constants.NV_NODENETTEST not in nresult
1974     _ErrorIf(test, self.ENODENET, node,
1975              "node hasn't returned node tcp connectivity data")
1976     if not test:
1977       if nresult[constants.NV_NODENETTEST]:
1978         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1979         for anode in nlist:
1980           _ErrorIf(True, self.ENODENET, node,
1981                    "tcp communication with node '%s': %s",
1982                    anode, nresult[constants.NV_NODENETTEST][anode])
1983
1984     test = constants.NV_MASTERIP not in nresult
1985     _ErrorIf(test, self.ENODENET, node,
1986              "node hasn't returned node master IP reachability data")
1987     if not test:
1988       if not nresult[constants.NV_MASTERIP]:
1989         if node == self.master_node:
1990           msg = "the master node cannot reach the master IP (not configured?)"
1991         else:
1992           msg = "cannot reach the master IP"
1993         _ErrorIf(True, self.ENODENET, node, msg)
1994
1995   def _VerifyInstance(self, instance, instanceconfig, node_image,
1996                       diskstatus):
1997     """Verify an instance.
1998
1999     This function checks to see if the required block devices are
2000     available on the instance's node.
2001
2002     """
2003     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004     node_current = instanceconfig.primary_node
2005
2006     node_vol_should = {}
2007     instanceconfig.MapLVsByNode(node_vol_should)
2008
2009     for node in node_vol_should:
2010       n_img = node_image[node]
2011       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012         # ignore missing volumes on offline or broken nodes
2013         continue
2014       for volume in node_vol_should[node]:
2015         test = volume not in n_img.volumes
2016         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017                  "volume %s missing on node %s", volume, node)
2018
2019     if instanceconfig.admin_up:
2020       pri_img = node_image[node_current]
2021       test = instance not in pri_img.instances and not pri_img.offline
2022       _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023                "instance not running on its primary node %s",
2024                node_current)
2025
2026     diskdata = [(nname, success, status, idx)
2027                 for (nname, disks) in diskstatus.items()
2028                 for idx, (success, status) in enumerate(disks)]
2029
2030     for nname, success, bdev_status, idx in diskdata:
2031       # the 'ghost node' construction in Exec() ensures that we have a
2032       # node here
2033       snode = node_image[nname]
2034       bad_snode = snode.ghost or snode.offline
2035       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036                self.EINSTANCEFAULTYDISK, instance,
2037                "couldn't retrieve status for disk/%s on %s: %s",
2038                idx, nname, bdev_status)
2039       _ErrorIf((instanceconfig.admin_up and success and
2040                 bdev_status.ldisk_status == constants.LDS_FAULTY),
2041                self.EINSTANCEFAULTYDISK, instance,
2042                "disk/%s on %s is faulty", idx, nname)
2043
2044   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045     """Verify if there are any unknown volumes in the cluster.
2046
2047     The .os, .swap and backup volumes are ignored. All other volumes are
2048     reported as unknown.
2049
2050     @type reserved: L{ganeti.utils.FieldSet}
2051     @param reserved: a FieldSet of reserved volume names
2052
2053     """
2054     for node, n_img in node_image.items():
2055       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056         # skip non-healthy nodes
2057         continue
2058       for volume in n_img.volumes:
2059         test = ((node not in node_vol_should or
2060                 volume not in node_vol_should[node]) and
2061                 not reserved.Matches(volume))
2062         self._ErrorIf(test, self.ENODEORPHANLV, node,
2063                       "volume %s is unknown", volume)
2064
2065   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066     """Verify N+1 Memory Resilience.
2067
2068     Check that if one single node dies we can still start all the
2069     instances it was primary for.
2070
2071     """
2072     cluster_info = self.cfg.GetClusterInfo()
2073     for node, n_img in node_image.items():
2074       # This code checks that every node which is now listed as
2075       # secondary has enough memory to host all instances it is
2076       # supposed to should a single other node in the cluster fail.
2077       # FIXME: not ready for failover to an arbitrary node
2078       # FIXME: does not support file-backed instances
2079       # WARNING: we currently take into account down instances as well
2080       # as up ones, considering that even if they're down someone
2081       # might want to start them even in the event of a node failure.
2082       if n_img.offline:
2083         # we're skipping offline nodes from the N+1 warning, since
2084         # most likely we don't have good memory infromation from them;
2085         # we already list instances living on such nodes, and that's
2086         # enough warning
2087         continue
2088       for prinode, instances in n_img.sbp.items():
2089         needed_mem = 0
2090         for instance in instances:
2091           bep = cluster_info.FillBE(instance_cfg[instance])
2092           if bep[constants.BE_AUTO_BALANCE]:
2093             needed_mem += bep[constants.BE_MEMORY]
2094         test = n_img.mfree < needed_mem
2095         self._ErrorIf(test, self.ENODEN1, node,
2096                       "not enough memory to accomodate instance failovers"
2097                       " should node %s fail (%dMiB needed, %dMiB available)",
2098                       prinode, needed_mem, n_img.mfree)
2099
2100   @classmethod
2101   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102                    (files_all, files_all_opt, files_mc, files_vm)):
2103     """Verifies file checksums collected from all nodes.
2104
2105     @param errorif: Callback for reporting errors
2106     @param nodeinfo: List of L{objects.Node} objects
2107     @param master_node: Name of master node
2108     @param all_nvinfo: RPC results
2109
2110     """
2111     node_names = frozenset(node.name for node in nodeinfo if not node.offline)
2112
2113     assert master_node in node_names
2114     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2115             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2116            "Found file listed in more than one file list"
2117
2118     # Define functions determining which nodes to consider for a file
2119     file2nodefn = dict([(filename, fn)
2120       for (files, fn) in [(files_all, None),
2121                           (files_all_opt, None),
2122                           (files_mc, lambda node: (node.master_candidate or
2123                                                    node.name == master_node)),
2124                           (files_vm, lambda node: node.vm_capable)]
2125       for filename in files])
2126
2127     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2128
2129     for node in nodeinfo:
2130       if node.offline:
2131         continue
2132
2133       nresult = all_nvinfo[node.name]
2134
2135       if nresult.fail_msg or not nresult.payload:
2136         node_files = None
2137       else:
2138         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2139
2140       test = not (node_files and isinstance(node_files, dict))
2141       errorif(test, cls.ENODEFILECHECK, node.name,
2142               "Node did not return file checksum data")
2143       if test:
2144         continue
2145
2146       for (filename, checksum) in node_files.items():
2147         # Check if the file should be considered for a node
2148         fn = file2nodefn[filename]
2149         if fn is None or fn(node):
2150           fileinfo[filename].setdefault(checksum, set()).add(node.name)
2151
2152     for (filename, checksums) in fileinfo.items():
2153       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2154
2155       # Nodes having the file
2156       with_file = frozenset(node_name
2157                             for nodes in fileinfo[filename].values()
2158                             for node_name in nodes)
2159
2160       # Nodes missing file
2161       missing_file = node_names - with_file
2162
2163       if filename in files_all_opt:
2164         # All or no nodes
2165         errorif(missing_file and missing_file != node_names,
2166                 cls.ECLUSTERFILECHECK, None,
2167                 "File %s is optional, but it must exist on all or no"
2168                 " nodes (not found on %s)",
2169                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2170       else:
2171         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2172                 "File %s is missing from node(s) %s", filename,
2173                 utils.CommaJoin(utils.NiceSort(missing_file)))
2174
2175       # See if there are multiple versions of the file
2176       test = len(checksums) > 1
2177       if test:
2178         variants = ["variant %s on %s" %
2179                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2180                     for (idx, (checksum, nodes)) in
2181                       enumerate(sorted(checksums.items()))]
2182       else:
2183         variants = []
2184
2185       errorif(test, cls.ECLUSTERFILECHECK, None,
2186               "File %s found with %s different checksums (%s)",
2187               filename, len(checksums), "; ".join(variants))
2188
2189   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2190                       drbd_map):
2191     """Verifies and the node DRBD status.
2192
2193     @type ninfo: L{objects.Node}
2194     @param ninfo: the node to check
2195     @param nresult: the remote results for the node
2196     @param instanceinfo: the dict of instances
2197     @param drbd_helper: the configured DRBD usermode helper
2198     @param drbd_map: the DRBD map as returned by
2199         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2200
2201     """
2202     node = ninfo.name
2203     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2204
2205     if drbd_helper:
2206       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2207       test = (helper_result == None)
2208       _ErrorIf(test, self.ENODEDRBDHELPER, node,
2209                "no drbd usermode helper returned")
2210       if helper_result:
2211         status, payload = helper_result
2212         test = not status
2213         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2214                  "drbd usermode helper check unsuccessful: %s", payload)
2215         test = status and (payload != drbd_helper)
2216         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2217                  "wrong drbd usermode helper: %s", payload)
2218
2219     # compute the DRBD minors
2220     node_drbd = {}
2221     for minor, instance in drbd_map[node].items():
2222       test = instance not in instanceinfo
2223       _ErrorIf(test, self.ECLUSTERCFG, None,
2224                "ghost instance '%s' in temporary DRBD map", instance)
2225         # ghost instance should not be running, but otherwise we
2226         # don't give double warnings (both ghost instance and
2227         # unallocated minor in use)
2228       if test:
2229         node_drbd[minor] = (instance, False)
2230       else:
2231         instance = instanceinfo[instance]
2232         node_drbd[minor] = (instance.name, instance.admin_up)
2233
2234     # and now check them
2235     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2236     test = not isinstance(used_minors, (tuple, list))
2237     _ErrorIf(test, self.ENODEDRBD, node,
2238              "cannot parse drbd status file: %s", str(used_minors))
2239     if test:
2240       # we cannot check drbd status
2241       return
2242
2243     for minor, (iname, must_exist) in node_drbd.items():
2244       test = minor not in used_minors and must_exist
2245       _ErrorIf(test, self.ENODEDRBD, node,
2246                "drbd minor %d of instance %s is not active", minor, iname)
2247     for minor in used_minors:
2248       test = minor not in node_drbd
2249       _ErrorIf(test, self.ENODEDRBD, node,
2250                "unallocated drbd minor %d is in use", minor)
2251
2252   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2253     """Builds the node OS structures.
2254
2255     @type ninfo: L{objects.Node}
2256     @param ninfo: the node to check
2257     @param nresult: the remote results for the node
2258     @param nimg: the node image object
2259
2260     """
2261     node = ninfo.name
2262     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2263
2264     remote_os = nresult.get(constants.NV_OSLIST, None)
2265     test = (not isinstance(remote_os, list) or
2266             not compat.all(isinstance(v, list) and len(v) == 7
2267                            for v in remote_os))
2268
2269     _ErrorIf(test, self.ENODEOS, node,
2270              "node hasn't returned valid OS data")
2271
2272     nimg.os_fail = test
2273
2274     if test:
2275       return
2276
2277     os_dict = {}
2278
2279     for (name, os_path, status, diagnose,
2280          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2281
2282       if name not in os_dict:
2283         os_dict[name] = []
2284
2285       # parameters is a list of lists instead of list of tuples due to
2286       # JSON lacking a real tuple type, fix it:
2287       parameters = [tuple(v) for v in parameters]
2288       os_dict[name].append((os_path, status, diagnose,
2289                             set(variants), set(parameters), set(api_ver)))
2290
2291     nimg.oslist = os_dict
2292
2293   def _VerifyNodeOS(self, ninfo, nimg, base):
2294     """Verifies the node OS list.
2295
2296     @type ninfo: L{objects.Node}
2297     @param ninfo: the node to check
2298     @param nimg: the node image object
2299     @param base: the 'template' node we match against (e.g. from the master)
2300
2301     """
2302     node = ninfo.name
2303     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2304
2305     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2306
2307     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2308     for os_name, os_data in nimg.oslist.items():
2309       assert os_data, "Empty OS status for OS %s?!" % os_name
2310       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2311       _ErrorIf(not f_status, self.ENODEOS, node,
2312                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2313       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2314                "OS '%s' has multiple entries (first one shadows the rest): %s",
2315                os_name, utils.CommaJoin([v[0] for v in os_data]))
2316       # comparisons with the 'base' image
2317       test = os_name not in base.oslist
2318       _ErrorIf(test, self.ENODEOS, node,
2319                "Extra OS %s not present on reference node (%s)",
2320                os_name, base.name)
2321       if test:
2322         continue
2323       assert base.oslist[os_name], "Base node has empty OS status?"
2324       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2325       if not b_status:
2326         # base OS is invalid, skipping
2327         continue
2328       for kind, a, b in [("API version", f_api, b_api),
2329                          ("variants list", f_var, b_var),
2330                          ("parameters", beautify_params(f_param),
2331                           beautify_params(b_param))]:
2332         _ErrorIf(a != b, self.ENODEOS, node,
2333                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2334                  kind, os_name, base.name,
2335                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2336
2337     # check any missing OSes
2338     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2339     _ErrorIf(missing, self.ENODEOS, node,
2340              "OSes present on reference node %s but missing on this node: %s",
2341              base.name, utils.CommaJoin(missing))
2342
2343   def _VerifyOob(self, ninfo, nresult):
2344     """Verifies out of band functionality of a node.
2345
2346     @type ninfo: L{objects.Node}
2347     @param ninfo: the node to check
2348     @param nresult: the remote results for the node
2349
2350     """
2351     node = ninfo.name
2352     # We just have to verify the paths on master and/or master candidates
2353     # as the oob helper is invoked on the master
2354     if ((ninfo.master_candidate or ninfo.master_capable) and
2355         constants.NV_OOB_PATHS in nresult):
2356       for path_result in nresult[constants.NV_OOB_PATHS]:
2357         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2358
2359   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2360     """Verifies and updates the node volume data.
2361
2362     This function will update a L{NodeImage}'s internal structures
2363     with data from the remote call.
2364
2365     @type ninfo: L{objects.Node}
2366     @param ninfo: the node to check
2367     @param nresult: the remote results for the node
2368     @param nimg: the node image object
2369     @param vg_name: the configured VG name
2370
2371     """
2372     node = ninfo.name
2373     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2374
2375     nimg.lvm_fail = True
2376     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2377     if vg_name is None:
2378       pass
2379     elif isinstance(lvdata, basestring):
2380       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2381                utils.SafeEncode(lvdata))
2382     elif not isinstance(lvdata, dict):
2383       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2384     else:
2385       nimg.volumes = lvdata
2386       nimg.lvm_fail = False
2387
2388   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2389     """Verifies and updates the node instance list.
2390
2391     If the listing was successful, then updates this node's instance
2392     list. Otherwise, it marks the RPC call as failed for the instance
2393     list key.
2394
2395     @type ninfo: L{objects.Node}
2396     @param ninfo: the node to check
2397     @param nresult: the remote results for the node
2398     @param nimg: the node image object
2399
2400     """
2401     idata = nresult.get(constants.NV_INSTANCELIST, None)
2402     test = not isinstance(idata, list)
2403     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2404                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2405     if test:
2406       nimg.hyp_fail = True
2407     else:
2408       nimg.instances = idata
2409
2410   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2411     """Verifies and computes a node information map
2412
2413     @type ninfo: L{objects.Node}
2414     @param ninfo: the node to check
2415     @param nresult: the remote results for the node
2416     @param nimg: the node image object
2417     @param vg_name: the configured VG name
2418
2419     """
2420     node = ninfo.name
2421     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2422
2423     # try to read free memory (from the hypervisor)
2424     hv_info = nresult.get(constants.NV_HVINFO, None)
2425     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2426     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2427     if not test:
2428       try:
2429         nimg.mfree = int(hv_info["memory_free"])
2430       except (ValueError, TypeError):
2431         _ErrorIf(True, self.ENODERPC, node,
2432                  "node returned invalid nodeinfo, check hypervisor")
2433
2434     # FIXME: devise a free space model for file based instances as well
2435     if vg_name is not None:
2436       test = (constants.NV_VGLIST not in nresult or
2437               vg_name not in nresult[constants.NV_VGLIST])
2438       _ErrorIf(test, self.ENODELVM, node,
2439                "node didn't return data for the volume group '%s'"
2440                " - it is either missing or broken", vg_name)
2441       if not test:
2442         try:
2443           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2444         except (ValueError, TypeError):
2445           _ErrorIf(True, self.ENODERPC, node,
2446                    "node returned invalid LVM info, check LVM status")
2447
2448   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2449     """Gets per-disk status information for all instances.
2450
2451     @type nodelist: list of strings
2452     @param nodelist: Node names
2453     @type node_image: dict of (name, L{objects.Node})
2454     @param node_image: Node objects
2455     @type instanceinfo: dict of (name, L{objects.Instance})
2456     @param instanceinfo: Instance objects
2457     @rtype: {instance: {node: [(succes, payload)]}}
2458     @return: a dictionary of per-instance dictionaries with nodes as
2459         keys and disk information as values; the disk information is a
2460         list of tuples (success, payload)
2461
2462     """
2463     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2464
2465     node_disks = {}
2466     node_disks_devonly = {}
2467     diskless_instances = set()
2468     diskless = constants.DT_DISKLESS
2469
2470     for nname in nodelist:
2471       node_instances = list(itertools.chain(node_image[nname].pinst,
2472                                             node_image[nname].sinst))
2473       diskless_instances.update(inst for inst in node_instances
2474                                 if instanceinfo[inst].disk_template == diskless)
2475       disks = [(inst, disk)
2476                for inst in node_instances
2477                for disk in instanceinfo[inst].disks]
2478
2479       if not disks:
2480         # No need to collect data
2481         continue
2482
2483       node_disks[nname] = disks
2484
2485       # Creating copies as SetDiskID below will modify the objects and that can
2486       # lead to incorrect data returned from nodes
2487       devonly = [dev.Copy() for (_, dev) in disks]
2488
2489       for dev in devonly:
2490         self.cfg.SetDiskID(dev, nname)
2491
2492       node_disks_devonly[nname] = devonly
2493
2494     assert len(node_disks) == len(node_disks_devonly)
2495
2496     # Collect data from all nodes with disks
2497     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2498                                                           node_disks_devonly)
2499
2500     assert len(result) == len(node_disks)
2501
2502     instdisk = {}
2503
2504     for (nname, nres) in result.items():
2505       disks = node_disks[nname]
2506
2507       if nres.offline:
2508         # No data from this node
2509         data = len(disks) * [(False, "node offline")]
2510       else:
2511         msg = nres.fail_msg
2512         _ErrorIf(msg, self.ENODERPC, nname,
2513                  "while getting disk information: %s", msg)
2514         if msg:
2515           # No data from this node
2516           data = len(disks) * [(False, msg)]
2517         else:
2518           data = []
2519           for idx, i in enumerate(nres.payload):
2520             if isinstance(i, (tuple, list)) and len(i) == 2:
2521               data.append(i)
2522             else:
2523               logging.warning("Invalid result from node %s, entry %d: %s",
2524                               nname, idx, i)
2525               data.append((False, "Invalid result from the remote node"))
2526
2527       for ((inst, _), status) in zip(disks, data):
2528         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2529
2530     # Add empty entries for diskless instances.
2531     for inst in diskless_instances:
2532       assert inst not in instdisk
2533       instdisk[inst] = {}
2534
2535     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2536                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2537                       compat.all(isinstance(s, (tuple, list)) and
2538                                  len(s) == 2 for s in statuses)
2539                       for inst, nnames in instdisk.items()
2540                       for nname, statuses in nnames.items())
2541     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2542
2543     return instdisk
2544
2545   def BuildHooksEnv(self):
2546     """Build hooks env.
2547
2548     Cluster-Verify hooks just ran in the post phase and their failure makes
2549     the output be logged in the verify output and the verification to fail.
2550
2551     """
2552     env = {
2553       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2554       }
2555
2556     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2557                for node in self.my_node_info.values())
2558
2559     return env
2560
2561   def BuildHooksNodes(self):
2562     """Build hooks nodes.
2563
2564     """
2565     return ([], self.my_node_names)
2566
2567   def Exec(self, feedback_fn):
2568     """Verify integrity of the node group, performing various test on nodes.
2569
2570     """
2571     # This method has too many local variables. pylint: disable=R0914
2572     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2573
2574     if not self.my_node_names:
2575       # empty node group
2576       feedback_fn("* Empty node group, skipping verification")
2577       return True
2578
2579     self.bad = False
2580     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2581     verbose = self.op.verbose
2582     self._feedback_fn = feedback_fn
2583
2584     vg_name = self.cfg.GetVGName()
2585     drbd_helper = self.cfg.GetDRBDHelper()
2586     cluster = self.cfg.GetClusterInfo()
2587     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2588     hypervisors = cluster.enabled_hypervisors
2589     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2590
2591     i_non_redundant = [] # Non redundant instances
2592     i_non_a_balanced = [] # Non auto-balanced instances
2593     n_offline = 0 # Count of offline nodes
2594     n_drained = 0 # Count of nodes being drained
2595     node_vol_should = {}
2596
2597     # FIXME: verify OS list
2598
2599     # File verification
2600     filemap = _ComputeAncillaryFiles(cluster, False)
2601
2602     # do local checksums
2603     master_node = self.master_node = self.cfg.GetMasterNode()
2604     master_ip = self.cfg.GetMasterIP()
2605
2606     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2607
2608     # We will make nodes contact all nodes in their group, and one node from
2609     # every other group.
2610     # TODO: should it be a *random* node, different every time?
2611     online_nodes = [node.name for node in node_data_list if not node.offline]
2612     other_group_nodes = {}
2613
2614     for name in sorted(self.all_node_info):
2615       node = self.all_node_info[name]
2616       if (node.group not in other_group_nodes
2617           and node.group != self.group_uuid
2618           and not node.offline):
2619         other_group_nodes[node.group] = node.name
2620
2621     node_verify_param = {
2622       constants.NV_FILELIST:
2623         utils.UniqueSequence(filename
2624                              for files in filemap
2625                              for filename in files),
2626       constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2627       constants.NV_HYPERVISOR: hypervisors,
2628       constants.NV_HVPARAMS:
2629         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2630       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2631                                  for node in node_data_list
2632                                  if not node.offline],
2633       constants.NV_INSTANCELIST: hypervisors,
2634       constants.NV_VERSION: None,
2635       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2636       constants.NV_NODESETUP: None,
2637       constants.NV_TIME: None,
2638       constants.NV_MASTERIP: (master_node, master_ip),
2639       constants.NV_OSLIST: None,
2640       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2641       }
2642
2643     if vg_name is not None:
2644       node_verify_param[constants.NV_VGLIST] = None
2645       node_verify_param[constants.NV_LVLIST] = vg_name
2646       node_verify_param[constants.NV_PVLIST] = [vg_name]
2647       node_verify_param[constants.NV_DRBDLIST] = None
2648
2649     if drbd_helper:
2650       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2651
2652     # bridge checks
2653     # FIXME: this needs to be changed per node-group, not cluster-wide
2654     bridges = set()
2655     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2656     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2657       bridges.add(default_nicpp[constants.NIC_LINK])
2658     for instance in self.my_inst_info.values():
2659       for nic in instance.nics:
2660         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2661         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2662           bridges.add(full_nic[constants.NIC_LINK])
2663
2664     if bridges:
2665       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2666
2667     # Build our expected cluster state
2668     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2669                                                  name=node.name,
2670                                                  vm_capable=node.vm_capable))
2671                       for node in node_data_list)
2672
2673     # Gather OOB paths
2674     oob_paths = []
2675     for node in self.all_node_info.values():
2676       path = _SupportsOob(self.cfg, node)
2677       if path and path not in oob_paths:
2678         oob_paths.append(path)
2679
2680     if oob_paths:
2681       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2682
2683     for instance in self.my_inst_names:
2684       inst_config = self.my_inst_info[instance]
2685
2686       for nname in inst_config.all_nodes:
2687         if nname not in node_image:
2688           gnode = self.NodeImage(name=nname)
2689           gnode.ghost = (nname not in self.all_node_info)
2690           node_image[nname] = gnode
2691
2692       inst_config.MapLVsByNode(node_vol_should)
2693
2694       pnode = inst_config.primary_node
2695       node_image[pnode].pinst.append(instance)
2696
2697       for snode in inst_config.secondary_nodes:
2698         nimg = node_image[snode]
2699         nimg.sinst.append(instance)
2700         if pnode not in nimg.sbp:
2701           nimg.sbp[pnode] = []
2702         nimg.sbp[pnode].append(instance)
2703
2704     # At this point, we have the in-memory data structures complete,
2705     # except for the runtime information, which we'll gather next
2706
2707     # Due to the way our RPC system works, exact response times cannot be
2708     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2709     # time before and after executing the request, we can at least have a time
2710     # window.
2711     nvinfo_starttime = time.time()
2712     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2713                                            node_verify_param,
2714                                            self.cfg.GetClusterName())
2715     nvinfo_endtime = time.time()
2716
2717     if self.extra_lv_nodes and vg_name is not None:
2718       extra_lv_nvinfo = \
2719           self.rpc.call_node_verify(self.extra_lv_nodes,
2720                                     {constants.NV_LVLIST: vg_name},
2721                                     self.cfg.GetClusterName())
2722     else:
2723       extra_lv_nvinfo = {}
2724
2725     all_drbd_map = self.cfg.ComputeDRBDMap()
2726
2727     feedback_fn("* Gathering disk information (%s nodes)" %
2728                 len(self.my_node_names))
2729     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2730                                      self.my_inst_info)
2731
2732     feedback_fn("* Verifying configuration file consistency")
2733
2734     # If not all nodes are being checked, we need to make sure the master node
2735     # and a non-checked vm_capable node are in the list.
2736     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2737     if absent_nodes:
2738       vf_nvinfo = all_nvinfo.copy()
2739       vf_node_info = list(self.my_node_info.values())
2740       additional_nodes = []
2741       if master_node not in self.my_node_info:
2742         additional_nodes.append(master_node)
2743         vf_node_info.append(self.all_node_info[master_node])
2744       # Add the first vm_capable node we find which is not included
2745       for node in absent_nodes:
2746         nodeinfo = self.all_node_info[node]
2747         if nodeinfo.vm_capable and not nodeinfo.offline:
2748           additional_nodes.append(node)
2749           vf_node_info.append(self.all_node_info[node])
2750           break
2751       key = constants.NV_FILELIST
2752       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2753                                                  {key: node_verify_param[key]},
2754                                                  self.cfg.GetClusterName()))
2755     else:
2756       vf_nvinfo = all_nvinfo
2757       vf_node_info = self.my_node_info.values()
2758
2759     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2760
2761     feedback_fn("* Verifying node status")
2762
2763     refos_img = None
2764
2765     for node_i in node_data_list:
2766       node = node_i.name
2767       nimg = node_image[node]
2768
2769       if node_i.offline:
2770         if verbose:
2771           feedback_fn("* Skipping offline node %s" % (node,))
2772         n_offline += 1
2773         continue
2774
2775       if node == master_node:
2776         ntype = "master"
2777       elif node_i.master_candidate:
2778         ntype = "master candidate"
2779       elif node_i.drained:
2780         ntype = "drained"
2781         n_drained += 1
2782       else:
2783         ntype = "regular"
2784       if verbose:
2785         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2786
2787       msg = all_nvinfo[node].fail_msg
2788       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2789       if msg:
2790         nimg.rpc_fail = True
2791         continue
2792
2793       nresult = all_nvinfo[node].payload
2794
2795       nimg.call_ok = self._VerifyNode(node_i, nresult)
2796       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2797       self._VerifyNodeNetwork(node_i, nresult)
2798       self._VerifyOob(node_i, nresult)
2799
2800       if nimg.vm_capable:
2801         self._VerifyNodeLVM(node_i, nresult, vg_name)
2802         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2803                              all_drbd_map)
2804
2805         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2806         self._UpdateNodeInstances(node_i, nresult, nimg)
2807         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2808         self._UpdateNodeOS(node_i, nresult, nimg)
2809
2810         if not nimg.os_fail:
2811           if refos_img is None:
2812             refos_img = nimg
2813           self._VerifyNodeOS(node_i, nimg, refos_img)
2814         self._VerifyNodeBridges(node_i, nresult, bridges)
2815
2816         # Check whether all running instancies are primary for the node. (This
2817         # can no longer be done from _VerifyInstance below, since some of the
2818         # wrong instances could be from other node groups.)
2819         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2820
2821         for inst in non_primary_inst:
2822           test = inst in self.all_inst_info
2823           _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2824                    "instance should not run on node %s", node_i.name)
2825           _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2826                    "node is running unknown instance %s", inst)
2827
2828     for node, result in extra_lv_nvinfo.items():
2829       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2830                               node_image[node], vg_name)
2831
2832     feedback_fn("* Verifying instance status")
2833     for instance in self.my_inst_names:
2834       if verbose:
2835         feedback_fn("* Verifying instance %s" % instance)
2836       inst_config = self.my_inst_info[instance]
2837       self._VerifyInstance(instance, inst_config, node_image,
2838                            instdisk[instance])
2839       inst_nodes_offline = []
2840
2841       pnode = inst_config.primary_node
2842       pnode_img = node_image[pnode]
2843       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2844                self.ENODERPC, pnode, "instance %s, connection to"
2845                " primary node failed", instance)
2846
2847       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2848                self.EINSTANCEBADNODE, instance,
2849                "instance is marked as running and lives on offline node %s",
2850                inst_config.primary_node)
2851
2852       # If the instance is non-redundant we cannot survive losing its primary
2853       # node, so we are not N+1 compliant. On the other hand we have no disk
2854       # templates with more than one secondary so that situation is not well
2855       # supported either.
2856       # FIXME: does not support file-backed instances
2857       if not inst_config.secondary_nodes:
2858         i_non_redundant.append(instance)
2859
2860       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2861                instance, "instance has multiple secondary nodes: %s",
2862                utils.CommaJoin(inst_config.secondary_nodes),
2863                code=self.ETYPE_WARNING)
2864
2865       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2866         pnode = inst_config.primary_node
2867         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2868         instance_groups = {}
2869
2870         for node in instance_nodes:
2871           instance_groups.setdefault(self.all_node_info[node].group,
2872                                      []).append(node)
2873
2874         pretty_list = [
2875           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2876           # Sort so that we always list the primary node first.
2877           for group, nodes in sorted(instance_groups.items(),
2878                                      key=lambda (_, nodes): pnode in nodes,
2879                                      reverse=True)]
2880
2881         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2882                       instance, "instance has primary and secondary nodes in"
2883                       " different groups: %s", utils.CommaJoin(pretty_list),
2884                       code=self.ETYPE_WARNING)
2885
2886       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2887         i_non_a_balanced.append(instance)
2888
2889       for snode in inst_config.secondary_nodes:
2890         s_img = node_image[snode]
2891         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2892                  "instance %s, connection to secondary node failed", instance)
2893
2894         if s_img.offline:
2895           inst_nodes_offline.append(snode)
2896
2897       # warn that the instance lives on offline nodes
2898       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2899                "instance has offline secondary node(s) %s",
2900                utils.CommaJoin(inst_nodes_offline))
2901       # ... or ghost/non-vm_capable nodes
2902       for node in inst_config.all_nodes:
2903         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2904                  "instance lives on ghost node %s", node)
2905         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2906                  instance, "instance lives on non-vm_capable node %s", node)
2907
2908     feedback_fn("* Verifying orphan volumes")
2909     reserved = utils.FieldSet(*cluster.reserved_lvs)
2910
2911     # We will get spurious "unknown volume" warnings if any node of this group
2912     # is secondary for an instance whose primary is in another group. To avoid
2913     # them, we find these instances and add their volumes to node_vol_should.
2914     for inst in self.all_inst_info.values():
2915       for secondary in inst.secondary_nodes:
2916         if (secondary in self.my_node_info
2917             and inst.name not in self.my_inst_info):
2918           inst.MapLVsByNode(node_vol_should)
2919           break
2920
2921     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2922
2923     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2924       feedback_fn("* Verifying N+1 Memory redundancy")
2925       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2926
2927     feedback_fn("* Other Notes")
2928     if i_non_redundant:
2929       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2930                   % len(i_non_redundant))
2931
2932     if i_non_a_balanced:
2933       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2934                   % len(i_non_a_balanced))
2935
2936     if n_offline:
2937       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2938
2939     if n_drained:
2940       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2941
2942     return not self.bad
2943
2944   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2945     """Analyze the post-hooks' result
2946
2947     This method analyses the hook result, handles it, and sends some
2948     nicely-formatted feedback back to the user.
2949
2950     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2951         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2952     @param hooks_results: the results of the multi-node hooks rpc call
2953     @param feedback_fn: function used send feedback back to the caller
2954     @param lu_result: previous Exec result
2955     @return: the new Exec result, based on the previous result
2956         and hook results
2957
2958     """
2959     # We only really run POST phase hooks, only for non-empty groups,
2960     # and are only interested in their results
2961     if not self.my_node_names:
2962       # empty node group
2963       pass
2964     elif phase == constants.HOOKS_PHASE_POST:
2965       # Used to change hooks' output to proper indentation
2966       feedback_fn("* Hooks Results")
2967       assert hooks_results, "invalid result from hooks"
2968
2969       for node_name in hooks_results:
2970         res = hooks_results[node_name]
2971         msg = res.fail_msg
2972         test = msg and not res.offline
2973         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2974                       "Communication failure in hooks execution: %s", msg)
2975         if res.offline or msg:
2976           # No need to investigate payload if node is offline or gave an error.
2977           # override manually lu_result here as _ErrorIf only
2978           # overrides self.bad
2979           lu_result = 1
2980           continue
2981         for script, hkr, output in res.payload:
2982           test = hkr == constants.HKR_FAIL
2983           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2984                         "Script %s failed, output:", script)
2985           if test:
2986             output = self._HOOKS_INDENT_RE.sub("      ", output)
2987             feedback_fn("%s" % output)
2988             lu_result = 0
2989
2990     return lu_result
2991
2992
2993 class LUClusterVerifyDisks(NoHooksLU):
2994   """Verifies the cluster disks status.
2995
2996   """
2997   REQ_BGL = False
2998
2999   def ExpandNames(self):
3000     self.share_locks = _ShareAll()
3001     self.needed_locks = {
3002       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3003       }
3004
3005   def Exec(self, feedback_fn):
3006     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3007
3008     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3009     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3010                            for group in group_names])
3011
3012
3013 class LUGroupVerifyDisks(NoHooksLU):
3014   """Verifies the status of all disks in a node group.
3015
3016   """
3017   REQ_BGL = False
3018
3019   def ExpandNames(self):
3020     # Raises errors.OpPrereqError on its own if group can't be found
3021     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3022
3023     self.share_locks = _ShareAll()
3024     self.needed_locks = {
3025       locking.LEVEL_INSTANCE: [],
3026       locking.LEVEL_NODEGROUP: [],
3027       locking.LEVEL_NODE: [],
3028       }
3029
3030   def DeclareLocks(self, level):
3031     if level == locking.LEVEL_INSTANCE:
3032       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3033
3034       # Lock instances optimistically, needs verification once node and group
3035       # locks have been acquired
3036       self.needed_locks[locking.LEVEL_INSTANCE] = \
3037         self.cfg.GetNodeGroupInstances(self.group_uuid)
3038
3039     elif level == locking.LEVEL_NODEGROUP:
3040       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3041
3042       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3043         set([self.group_uuid] +
3044             # Lock all groups used by instances optimistically; this requires
3045             # going via the node before it's locked, requiring verification
3046             # later on
3047             [group_uuid
3048              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3049              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3050
3051     elif level == locking.LEVEL_NODE:
3052       # This will only lock the nodes in the group to be verified which contain
3053       # actual instances
3054       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3055       self._LockInstancesNodes()
3056
3057       # Lock all nodes in group to be verified
3058       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3059       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3060       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3061
3062   def CheckPrereq(self):
3063     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3064     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3065     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3066
3067     assert self.group_uuid in owned_groups
3068
3069     # Check if locked instances are still correct
3070     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3071
3072     # Get instance information
3073     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3074
3075     # Check if node groups for locked instances are still correct
3076     for (instance_name, inst) in self.instances.items():
3077       assert owned_nodes.issuperset(inst.all_nodes), \
3078         "Instance %s's nodes changed while we kept the lock" % instance_name
3079
3080       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3081                                              owned_groups)
3082
3083       assert self.group_uuid in inst_groups, \
3084         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3085
3086   def Exec(self, feedback_fn):
3087     """Verify integrity of cluster disks.
3088
3089     @rtype: tuple of three items
3090     @return: a tuple of (dict of node-to-node_error, list of instances
3091         which need activate-disks, dict of instance: (node, volume) for
3092         missing volumes
3093
3094     """
3095     res_nodes = {}
3096     res_instances = set()
3097     res_missing = {}
3098
3099     nv_dict = _MapInstanceDisksToNodes([inst
3100                                         for inst in self.instances.values()
3101                                         if inst.admin_up])
3102
3103     if nv_dict:
3104       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3105                              set(self.cfg.GetVmCapableNodeList()))
3106
3107       node_lvs = self.rpc.call_lv_list(nodes, [])
3108
3109       for (node, node_res) in node_lvs.items():
3110         if node_res.offline:
3111           continue
3112
3113         msg = node_res.fail_msg
3114         if msg:
3115           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3116           res_nodes[node] = msg
3117           continue
3118
3119         for lv_name, (_, _, lv_online) in node_res.payload.items():
3120           inst = nv_dict.pop((node, lv_name), None)
3121           if not (lv_online or inst is None):
3122             res_instances.add(inst)
3123
3124       # any leftover items in nv_dict are missing LVs, let's arrange the data
3125       # better
3126       for key, inst in nv_dict.iteritems():
3127         res_missing.setdefault(inst, []).append(key)
3128
3129     return (res_nodes, list(res_instances), res_missing)
3130
3131
3132 class LUClusterRepairDiskSizes(NoHooksLU):
3133   """Verifies the cluster disks sizes.
3134
3135   """
3136   REQ_BGL = False
3137
3138   def ExpandNames(self):
3139     if self.op.instances:
3140       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3141       self.needed_locks = {
3142         locking.LEVEL_NODE: [],
3143         locking.LEVEL_INSTANCE: self.wanted_names,
3144         }
3145       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3146     else:
3147       self.wanted_names = None
3148       self.needed_locks = {
3149         locking.LEVEL_NODE: locking.ALL_SET,
3150         locking.LEVEL_INSTANCE: locking.ALL_SET,
3151         }
3152     self.share_locks = _ShareAll()
3153
3154   def DeclareLocks(self, level):
3155     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3156       self._LockInstancesNodes(primary_only=True)
3157
3158   def CheckPrereq(self):
3159     """Check prerequisites.
3160
3161     This only checks the optional instance list against the existing names.
3162
3163     """
3164     if self.wanted_names is None:
3165       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3166
3167     self.wanted_instances = \
3168         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3169
3170   def _EnsureChildSizes(self, disk):
3171     """Ensure children of the disk have the needed disk size.
3172
3173     This is valid mainly for DRBD8 and fixes an issue where the
3174     children have smaller disk size.
3175
3176     @param disk: an L{ganeti.objects.Disk} object
3177
3178     """
3179     if disk.dev_type == constants.LD_DRBD8:
3180       assert disk.children, "Empty children for DRBD8?"
3181       fchild = disk.children[0]
3182       mismatch = fchild.size < disk.size
3183       if mismatch:
3184         self.LogInfo("Child disk has size %d, parent %d, fixing",
3185                      fchild.size, disk.size)
3186         fchild.size = disk.size
3187
3188       # and we recurse on this child only, not on the metadev
3189       return self._EnsureChildSizes(fchild) or mismatch
3190     else:
3191       return False
3192
3193   def Exec(self, feedback_fn):
3194     """Verify the size of cluster disks.
3195
3196     """
3197     # TODO: check child disks too
3198     # TODO: check differences in size between primary/secondary nodes
3199     per_node_disks = {}
3200     for instance in self.wanted_instances:
3201       pnode = instance.primary_node
3202       if pnode not in per_node_disks:
3203         per_node_disks[pnode] = []
3204       for idx, disk in enumerate(instance.disks):
3205         per_node_disks[pnode].append((instance, idx, disk))
3206
3207     changed = []
3208     for node, dskl in per_node_disks.items():
3209       newl = [v[2].Copy() for v in dskl]
3210       for dsk in newl:
3211         self.cfg.SetDiskID(dsk, node)
3212       result = self.rpc.call_blockdev_getsize(node, newl)
3213       if result.fail_msg:
3214         self.LogWarning("Failure in blockdev_getsize call to node"
3215                         " %s, ignoring", node)
3216         continue
3217       if len(result.payload) != len(dskl):
3218         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3219                         " result.payload=%s", node, len(dskl), result.payload)
3220         self.LogWarning("Invalid result from node %s, ignoring node results",
3221                         node)
3222         continue
3223       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3224         if size is None:
3225           self.LogWarning("Disk %d of instance %s did not return size"
3226                           " information, ignoring", idx, instance.name)
3227           continue
3228         if not isinstance(size, (int, long)):
3229           self.LogWarning("Disk %d of instance %s did not return valid"
3230                           " size information, ignoring", idx, instance.name)
3231           continue
3232         size = size >> 20
3233         if size != disk.size:
3234           self.LogInfo("Disk %d of instance %s has mismatched size,"
3235                        " correcting: recorded %d, actual %d", idx,
3236                        instance.name, disk.size, size)
3237           disk.size = size
3238           self.cfg.Update(instance, feedback_fn)
3239           changed.append((instance.name, idx, size))
3240         if self._EnsureChildSizes(disk):
3241           self.cfg.Update(instance, feedback_fn)
3242           changed.append((instance.name, idx, disk.size))
3243     return changed
3244
3245
3246 class LUClusterRename(LogicalUnit):
3247   """Rename the cluster.
3248
3249   """
3250   HPATH = "cluster-rename"
3251   HTYPE = constants.HTYPE_CLUSTER
3252
3253   def BuildHooksEnv(self):
3254     """Build hooks env.
3255
3256     """
3257     return {
3258       "OP_TARGET": self.cfg.GetClusterName(),
3259       "NEW_NAME": self.op.name,
3260       }
3261
3262   def BuildHooksNodes(self):
3263     """Build hooks nodes.
3264
3265     """
3266     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3267
3268   def CheckPrereq(self):
3269     """Verify that the passed name is a valid one.
3270
3271     """
3272     hostname = netutils.GetHostname(name=self.op.name,
3273                                     family=self.cfg.GetPrimaryIPFamily())
3274
3275     new_name = hostname.name
3276     self.ip = new_ip = hostname.ip
3277     old_name = self.cfg.GetClusterName()
3278     old_ip = self.cfg.GetMasterIP()
3279     if new_name == old_name and new_ip == old_ip:
3280       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3281                                  " cluster has changed",
3282                                  errors.ECODE_INVAL)
3283     if new_ip != old_ip:
3284       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3285         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3286                                    " reachable on the network" %
3287                                    new_ip, errors.ECODE_NOTUNIQUE)
3288
3289     self.op.name = new_name
3290
3291   def Exec(self, feedback_fn):
3292     """Rename the cluster.
3293
3294     """
3295     clustername = self.op.name
3296     ip = self.ip
3297
3298     # shutdown the master IP
3299     master = self.cfg.GetMasterNode()
3300     result = self.rpc.call_node_stop_master(master, False)
3301     result.Raise("Could not disable the master role")
3302
3303     try:
3304       cluster = self.cfg.GetClusterInfo()
3305       cluster.cluster_name = clustername
3306       cluster.master_ip = ip
3307       self.cfg.Update(cluster, feedback_fn)
3308
3309       # update the known hosts file
3310       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3311       node_list = self.cfg.GetOnlineNodeList()
3312       try:
3313         node_list.remove(master)
3314       except ValueError:
3315         pass
3316       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3317     finally:
3318       result = self.rpc.call_node_start_master(master, False, False)
3319       msg = result.fail_msg
3320       if msg:
3321         self.LogWarning("Could not re-enable the master role on"
3322                         " the master, please restart manually: %s", msg)
3323
3324     return clustername
3325
3326
3327 class LUClusterSetParams(LogicalUnit):
3328   """Change the parameters of the cluster.
3329
3330   """
3331   HPATH = "cluster-modify"
3332   HTYPE = constants.HTYPE_CLUSTER
3333   REQ_BGL = False
3334
3335   def CheckArguments(self):
3336     """Check parameters
3337
3338     """
3339     if self.op.uid_pool:
3340       uidpool.CheckUidPool(self.op.uid_pool)
3341
3342     if self.op.add_uids:
3343       uidpool.CheckUidPool(self.op.add_uids)
3344
3345     if self.op.remove_uids:
3346       uidpool.CheckUidPool(self.op.remove_uids)
3347
3348   def ExpandNames(self):
3349     # FIXME: in the future maybe other cluster params won't require checking on
3350     # all nodes to be modified.
3351     self.needed_locks = {
3352       locking.LEVEL_NODE: locking.ALL_SET,
3353     }
3354     self.share_locks[locking.LEVEL_NODE] = 1
3355
3356   def BuildHooksEnv(self):
3357     """Build hooks env.
3358
3359     """
3360     return {
3361       "OP_TARGET": self.cfg.GetClusterName(),
3362       "NEW_VG_NAME": self.op.vg_name,
3363       }
3364
3365   def BuildHooksNodes(self):
3366     """Build hooks nodes.
3367
3368     """
3369     mn = self.cfg.GetMasterNode()
3370     return ([mn], [mn])
3371
3372   def CheckPrereq(self):
3373     """Check prerequisites.
3374
3375     This checks whether the given params don't conflict and
3376     if the given volume group is valid.
3377
3378     """
3379     if self.op.vg_name is not None and not self.op.vg_name:
3380       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3381         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3382                                    " instances exist", errors.ECODE_INVAL)
3383
3384     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3385       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3386         raise errors.OpPrereqError("Cannot disable drbd helper while"
3387                                    " drbd-based instances exist",
3388                                    errors.ECODE_INVAL)
3389
3390     node_list = self.owned_locks(locking.LEVEL_NODE)
3391
3392     # if vg_name not None, checks given volume group on all nodes
3393     if self.op.vg_name:
3394       vglist = self.rpc.call_vg_list(node_list)
3395       for node in node_list:
3396         msg = vglist[node].fail_msg
3397         if msg:
3398           # ignoring down node
3399           self.LogWarning("Error while gathering data on node %s"
3400                           " (ignoring node): %s", node, msg)
3401           continue
3402         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3403                                               self.op.vg_name,
3404                                               constants.MIN_VG_SIZE)
3405         if vgstatus:
3406           raise errors.OpPrereqError("Error on node '%s': %s" %
3407                                      (node, vgstatus), errors.ECODE_ENVIRON)
3408
3409     if self.op.drbd_helper:
3410       # checks given drbd helper on all nodes
3411       helpers = self.rpc.call_drbd_helper(node_list)
3412       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3413         if ninfo.offline:
3414           self.LogInfo("Not checking drbd helper on offline node %s", node)
3415           continue
3416         msg = helpers[node].fail_msg
3417         if msg:
3418           raise errors.OpPrereqError("Error checking drbd helper on node"
3419                                      " '%s': %s" % (node, msg),
3420                                      errors.ECODE_ENVIRON)
3421         node_helper = helpers[node].payload
3422         if node_helper != self.op.drbd_helper:
3423           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3424                                      (node, node_helper), errors.ECODE_ENVIRON)
3425
3426     self.cluster = cluster = self.cfg.GetClusterInfo()
3427     # validate params changes
3428     if self.op.beparams:
3429       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3430       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3431
3432     if self.op.ndparams:
3433       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3434       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3435
3436       # TODO: we need a more general way to handle resetting
3437       # cluster-level parameters to default values
3438       if self.new_ndparams["oob_program"] == "":
3439         self.new_ndparams["oob_program"] = \
3440             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3441
3442     if self.op.nicparams:
3443       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3444       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3445       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3446       nic_errors = []
3447
3448       # check all instances for consistency
3449       for instance in self.cfg.GetAllInstancesInfo().values():
3450         for nic_idx, nic in enumerate(instance.nics):
3451           params_copy = copy.deepcopy(nic.nicparams)
3452           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3453
3454           # check parameter syntax
3455           try:
3456             objects.NIC.CheckParameterSyntax(params_filled)
3457           except errors.ConfigurationError, err:
3458             nic_errors.append("Instance %s, nic/%d: %s" %
3459                               (instance.name, nic_idx, err))
3460
3461           # if we're moving instances to routed, check that they have an ip
3462           target_mode = params_filled[constants.NIC_MODE]
3463           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3464             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3465                               " address" % (instance.name, nic_idx))
3466       if nic_errors:
3467         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3468                                    "\n".join(nic_errors))
3469
3470     # hypervisor list/parameters
3471     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3472     if self.op.hvparams:
3473       for hv_name, hv_dict in self.op.hvparams.items():
3474         if hv_name not in self.new_hvparams:
3475           self.new_hvparams[hv_name] = hv_dict
3476         else:
3477           self.new_hvparams[hv_name].update(hv_dict)
3478
3479     # os hypervisor parameters
3480     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3481     if self.op.os_hvp:
3482       for os_name, hvs in self.op.os_hvp.items():
3483         if os_name not in self.new_os_hvp:
3484           self.new_os_hvp[os_name] = hvs
3485         else:
3486           for hv_name, hv_dict in hvs.items():
3487             if hv_name not in self.new_os_hvp[os_name]:
3488               self.new_os_hvp[os_name][hv_name] = hv_dict
3489             else:
3490               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3491
3492     # os parameters
3493     self.new_osp = objects.FillDict(cluster.osparams, {})
3494     if self.op.osparams:
3495       for os_name, osp in self.op.osparams.items():
3496         if os_name not in self.new_osp:
3497           self.new_osp[os_name] = {}
3498
3499         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3500                                                   use_none=True)
3501
3502         if not self.new_osp[os_name]:
3503           # we removed all parameters
3504           del self.new_osp[os_name]
3505         else:
3506           # check the parameter validity (remote check)
3507           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3508                          os_name, self.new_osp[os_name])
3509
3510     # changes to the hypervisor list
3511     if self.op.enabled_hypervisors is not None:
3512       self.hv_list = self.op.enabled_hypervisors
3513       for hv in self.hv_list:
3514         # if the hypervisor doesn't already exist in the cluster
3515         # hvparams, we initialize it to empty, and then (in both
3516         # cases) we make sure to fill the defaults, as we might not
3517         # have a complete defaults list if the hypervisor wasn't
3518         # enabled before
3519         if hv not in new_hvp:
3520           new_hvp[hv] = {}
3521         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3522         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3523     else:
3524       self.hv_list = cluster.enabled_hypervisors
3525
3526     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3527       # either the enabled list has changed, or the parameters have, validate
3528       for hv_name, hv_params in self.new_hvparams.items():
3529         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3530             (self.op.enabled_hypervisors and
3531              hv_name in self.op.enabled_hypervisors)):
3532           # either this is a new hypervisor, or its parameters have changed
3533           hv_class = hypervisor.GetHypervisor(hv_name)
3534           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3535           hv_class.CheckParameterSyntax(hv_params)
3536           _CheckHVParams(self, node_list, hv_name, hv_params)
3537
3538     if self.op.os_hvp:
3539       # no need to check any newly-enabled hypervisors, since the
3540       # defaults have already been checked in the above code-block
3541       for os_name, os_hvp in self.new_os_hvp.items():
3542         for hv_name, hv_params in os_hvp.items():
3543           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3544           # we need to fill in the new os_hvp on top of the actual hv_p
3545           cluster_defaults = self.new_hvparams.get(hv_name, {})
3546           new_osp = objects.FillDict(cluster_defaults, hv_params)
3547           hv_class = hypervisor.GetHypervisor(hv_name)
3548           hv_class.CheckParameterSyntax(new_osp)
3549           _CheckHVParams(self, node_list, hv_name, new_osp)
3550
3551     if self.op.default_iallocator:
3552       alloc_script = utils.FindFile(self.op.default_iallocator,
3553                                     constants.IALLOCATOR_SEARCH_PATH,
3554                                     os.path.isfile)
3555       if alloc_script is None:
3556         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3557                                    " specified" % self.op.default_iallocator,
3558                                    errors.ECODE_INVAL)
3559
3560   def Exec(self, feedback_fn):
3561     """Change the parameters of the cluster.
3562
3563     """
3564     if self.op.vg_name is not None:
3565       new_volume = self.op.vg_name
3566       if not new_volume:
3567         new_volume = None
3568       if new_volume != self.cfg.GetVGName():
3569         self.cfg.SetVGName(new_volume)
3570       else:
3571         feedback_fn("Cluster LVM configuration already in desired"
3572                     " state, not changing")
3573     if self.op.drbd_helper is not None:
3574       new_helper = self.op.drbd_helper
3575       if not new_helper:
3576         new_helper = None
3577       if new_helper != self.cfg.GetDRBDHelper():
3578         self.cfg.SetDRBDHelper(new_helper)
3579       else:
3580         feedback_fn("Cluster DRBD helper already in desired state,"
3581                     " not changing")
3582     if self.op.hvparams:
3583       self.cluster.hvparams = self.new_hvparams
3584     if self.op.os_hvp:
3585       self.cluster.os_hvp = self.new_os_hvp
3586     if self.op.enabled_hypervisors is not None:
3587       self.cluster.hvparams = self.new_hvparams
3588       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3589     if self.op.beparams:
3590       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3591     if self.op.nicparams:
3592       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3593     if self.op.osparams:
3594       self.cluster.osparams = self.new_osp
3595     if self.op.ndparams:
3596       self.cluster.ndparams = self.new_ndparams
3597
3598     if self.op.candidate_pool_size is not None:
3599       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3600       # we need to update the pool size here, otherwise the save will fail
3601       _AdjustCandidatePool(self, [])
3602
3603     if self.op.maintain_node_health is not None:
3604       self.cluster.maintain_node_health = self.op.maintain_node_health
3605
3606     if self.op.prealloc_wipe_disks is not None:
3607       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3608
3609     if self.op.add_uids is not None:
3610       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3611
3612     if self.op.remove_uids is not None:
3613       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3614
3615     if self.op.uid_pool is not None:
3616       self.cluster.uid_pool = self.op.uid_pool
3617
3618     if self.op.default_iallocator is not None:
3619       self.cluster.default_iallocator = self.op.default_iallocator
3620
3621     if self.op.reserved_lvs is not None:
3622       self.cluster.reserved_lvs = self.op.reserved_lvs
3623
3624     def helper_os(aname, mods, desc):
3625       desc += " OS list"
3626       lst = getattr(self.cluster, aname)
3627       for key, val in mods:
3628         if key == constants.DDM_ADD:
3629           if val in lst:
3630             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3631           else:
3632             lst.append(val)
3633         elif key == constants.DDM_REMOVE:
3634           if val in lst:
3635             lst.remove(val)
3636           else:
3637             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3638         else:
3639           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3640
3641     if self.op.hidden_os:
3642       helper_os("hidden_os", self.op.hidden_os, "hidden")
3643
3644     if self.op.blacklisted_os:
3645       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3646
3647     if self.op.master_netdev:
3648       master = self.cfg.GetMasterNode()
3649       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3650                   self.cluster.master_netdev)
3651       result = self.rpc.call_node_stop_master(master, False)
3652       result.Raise("Could not disable the master ip")
3653       feedback_fn("Changing master_netdev from %s to %s" %
3654                   (self.cluster.master_netdev, self.op.master_netdev))
3655       self.cluster.master_netdev = self.op.master_netdev
3656
3657     self.cfg.Update(self.cluster, feedback_fn)
3658
3659     if self.op.master_netdev:
3660       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3661                   self.op.master_netdev)
3662       result = self.rpc.call_node_start_master(master, False, False)
3663       if result.fail_msg:
3664         self.LogWarning("Could not re-enable the master ip on"
3665                         " the master, please restart manually: %s",
3666                         result.fail_msg)
3667
3668
3669 def _UploadHelper(lu, nodes, fname):
3670   """Helper for uploading a file and showing warnings.
3671
3672   """
3673   if os.path.exists(fname):
3674     result = lu.rpc.call_upload_file(nodes, fname)
3675     for to_node, to_result in result.items():
3676       msg = to_result.fail_msg
3677       if msg:
3678         msg = ("Copy of file %s to node %s failed: %s" %
3679                (fname, to_node, msg))
3680         lu.proc.LogWarning(msg)
3681
3682
3683 def _ComputeAncillaryFiles(cluster, redist):
3684   """Compute files external to Ganeti which need to be consistent.
3685
3686   @type redist: boolean
3687   @param redist: Whether to include files which need to be redistributed
3688
3689   """
3690   # Compute files for all nodes
3691   files_all = set([
3692     constants.SSH_KNOWN_HOSTS_FILE,
3693     constants.CONFD_HMAC_KEY,
3694     constants.CLUSTER_DOMAIN_SECRET_FILE,
3695     ])
3696
3697   if not redist:
3698     files_all.update(constants.ALL_CERT_FILES)
3699     files_all.update(ssconf.SimpleStore().GetFileList())
3700
3701   if cluster.modify_etc_hosts:
3702     files_all.add(constants.ETC_HOSTS)
3703
3704   # Files which must either exist on all nodes or on none
3705   files_all_opt = set([
3706     constants.RAPI_USERS_FILE,
3707     ])
3708
3709   # Files which should only be on master candidates
3710   files_mc = set()
3711   if not redist:
3712     files_mc.add(constants.CLUSTER_CONF_FILE)
3713
3714   # Files which should only be on VM-capable nodes
3715   files_vm = set(filename
3716     for hv_name in cluster.enabled_hypervisors
3717     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3718
3719   # Filenames must be unique
3720   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3721           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3722          "Found file listed in more than one file list"
3723
3724   return (files_all, files_all_opt, files_mc, files_vm)
3725
3726
3727 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3728   """Distribute additional files which are part of the cluster configuration.
3729
3730   ConfigWriter takes care of distributing the config and ssconf files, but
3731   there are more files which should be distributed to all nodes. This function
3732   makes sure those are copied.
3733
3734   @param lu: calling logical unit
3735   @param additional_nodes: list of nodes not in the config to distribute to
3736   @type additional_vm: boolean
3737   @param additional_vm: whether the additional nodes are vm-capable or not
3738
3739   """
3740   # Gather target nodes
3741   cluster = lu.cfg.GetClusterInfo()
3742   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3743
3744   online_nodes = lu.cfg.GetOnlineNodeList()
3745   vm_nodes = lu.cfg.GetVmCapableNodeList()
3746
3747   if additional_nodes is not None:
3748     online_nodes.extend(additional_nodes)
3749     if additional_vm:
3750       vm_nodes.extend(additional_nodes)
3751
3752   # Never distribute to master node
3753   for nodelist in [online_nodes, vm_nodes]:
3754     if master_info.name in nodelist:
3755       nodelist.remove(master_info.name)
3756
3757   # Gather file lists
3758   (files_all, files_all_opt, files_mc, files_vm) = \
3759     _ComputeAncillaryFiles(cluster, True)
3760
3761   # Never re-distribute configuration file from here
3762   assert not (constants.CLUSTER_CONF_FILE in files_all or
3763               constants.CLUSTER_CONF_FILE in files_vm)
3764   assert not files_mc, "Master candidates not handled in this function"
3765
3766   filemap = [
3767     (online_nodes, files_all),
3768     (online_nodes, files_all_opt),
3769     (vm_nodes, files_vm),
3770     ]
3771
3772   # Upload the files
3773   for (node_list, files) in filemap:
3774     for fname in files:
3775       _UploadHelper(lu, node_list, fname)
3776
3777
3778 class LUClusterRedistConf(NoHooksLU):
3779   """Force the redistribution of cluster configuration.
3780
3781   This is a very simple LU.
3782
3783   """
3784   REQ_BGL = False
3785
3786   def ExpandNames(self):
3787     self.needed_locks = {
3788       locking.LEVEL_NODE: locking.ALL_SET,
3789     }
3790     self.share_locks[locking.LEVEL_NODE] = 1
3791
3792   def Exec(self, feedback_fn):
3793     """Redistribute the configuration.
3794
3795     """
3796     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3797     _RedistributeAncillaryFiles(self)
3798
3799
3800 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3801   """Sleep and poll for an instance's disk to sync.
3802
3803   """
3804   if not instance.disks or disks is not None and not disks:
3805     return True
3806
3807   disks = _ExpandCheckDisks(instance, disks)
3808
3809   if not oneshot:
3810     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3811
3812   node = instance.primary_node
3813
3814   for dev in disks:
3815     lu.cfg.SetDiskID(dev, node)
3816
3817   # TODO: Convert to utils.Retry
3818
3819   retries = 0
3820   degr_retries = 10 # in seconds, as we sleep 1 second each time
3821   while True:
3822     max_time = 0
3823     done = True
3824     cumul_degraded = False
3825     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3826     msg = rstats.fail_msg
3827     if msg:
3828       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3829       retries += 1
3830       if retries >= 10:
3831         raise errors.RemoteError("Can't contact node %s for mirror data,"
3832                                  " aborting." % node)
3833       time.sleep(6)
3834       continue
3835     rstats = rstats.payload
3836     retries = 0
3837     for i, mstat in enumerate(rstats):
3838       if mstat is None:
3839         lu.LogWarning("Can't compute data for node %s/%s",
3840                            node, disks[i].iv_name)
3841         continue
3842
3843       cumul_degraded = (cumul_degraded or
3844                         (mstat.is_degraded and mstat.sync_percent is None))
3845       if mstat.sync_percent is not None:
3846         done = False
3847         if mstat.estimated_time is not None:
3848           rem_time = ("%s remaining (estimated)" %
3849                       utils.FormatSeconds(mstat.estimated_time))
3850           max_time = mstat.estimated_time
3851         else:
3852           rem_time = "no time estimate"
3853         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3854                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3855
3856     # if we're done but degraded, let's do a few small retries, to
3857     # make sure we see a stable and not transient situation; therefore
3858     # we force restart of the loop
3859     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3860       logging.info("Degraded disks found, %d retries left", degr_retries)
3861       degr_retries -= 1
3862       time.sleep(1)
3863       continue
3864
3865     if done or oneshot:
3866       break
3867
3868     time.sleep(min(60, max_time))
3869
3870   if done:
3871     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3872   return not cumul_degraded
3873
3874
3875 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3876   """Check that mirrors are not degraded.
3877
3878   The ldisk parameter, if True, will change the test from the
3879   is_degraded attribute (which represents overall non-ok status for
3880   the device(s)) to the ldisk (representing the local storage status).
3881
3882   """
3883   lu.cfg.SetDiskID(dev, node)
3884
3885   result = True
3886
3887   if on_primary or dev.AssembleOnSecondary():
3888     rstats = lu.rpc.call_blockdev_find(node, dev)
3889     msg = rstats.fail_msg
3890     if msg:
3891       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3892       result = False
3893     elif not rstats.payload:
3894       lu.LogWarning("Can't find disk on node %s", node)
3895       result = False
3896     else:
3897       if ldisk:
3898         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3899       else:
3900         result = result and not rstats.payload.is_degraded
3901
3902   if dev.children:
3903     for child in dev.children:
3904       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3905
3906   return result
3907
3908
3909 class LUOobCommand(NoHooksLU):
3910   """Logical unit for OOB handling.
3911
3912   """
3913   REG_BGL = False
3914   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3915
3916   def ExpandNames(self):
3917     """Gather locks we need.
3918
3919     """
3920     if self.op.node_names:
3921       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3922       lock_names = self.op.node_names
3923     else:
3924       lock_names = locking.ALL_SET
3925
3926     self.needed_locks = {
3927       locking.LEVEL_NODE: lock_names,
3928       }
3929
3930   def CheckPrereq(self):
3931     """Check prerequisites.
3932
3933     This checks:
3934      - the node exists in the configuration
3935      - OOB is supported
3936
3937     Any errors are signaled by raising errors.OpPrereqError.
3938
3939     """
3940     self.nodes = []
3941     self.master_node = self.cfg.GetMasterNode()
3942
3943     assert self.op.power_delay >= 0.0
3944
3945     if self.op.node_names:
3946       if (self.op.command in self._SKIP_MASTER and
3947           self.master_node in self.op.node_names):
3948         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3949         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3950
3951         if master_oob_handler:
3952           additional_text = ("run '%s %s %s' if you want to operate on the"
3953                              " master regardless") % (master_oob_handler,
3954                                                       self.op.command,
3955                                                       self.master_node)
3956         else:
3957           additional_text = "it does not support out-of-band operations"
3958
3959         raise errors.OpPrereqError(("Operating on the master node %s is not"
3960                                     " allowed for %s; %s") %
3961                                    (self.master_node, self.op.command,
3962                                     additional_text), errors.ECODE_INVAL)
3963     else:
3964       self.op.node_names = self.cfg.GetNodeList()
3965       if self.op.command in self._SKIP_MASTER:
3966         self.op.node_names.remove(self.master_node)
3967
3968     if self.op.command in self._SKIP_MASTER:
3969       assert self.master_node not in self.op.node_names
3970
3971     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
3972       if node is None:
3973         raise errors.OpPrereqError("Node %s not found" % node_name,
3974                                    errors.ECODE_NOENT)
3975       else:
3976         self.nodes.append(node)
3977
3978       if (not self.op.ignore_status and
3979           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3980         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3981                                     " not marked offline") % node_name,
3982                                    errors.ECODE_STATE)
3983
3984   def Exec(self, feedback_fn):
3985     """Execute OOB and return result if we expect any.
3986
3987     """
3988     master_node = self.master_node
3989     ret = []
3990
3991     for idx, node in enumerate(utils.NiceSort(self.nodes,
3992                                               key=lambda node: node.name)):
3993       node_entry = [(constants.RS_NORMAL, node.name)]
3994       ret.append(node_entry)
3995
3996       oob_program = _SupportsOob(self.cfg, node)
3997
3998       if not oob_program:
3999         node_entry.append((constants.RS_UNAVAIL, None))
4000         continue
4001
4002       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4003                    self.op.command, oob_program, node.name)
4004       result = self.rpc.call_run_oob(master_node, oob_program,
4005                                      self.op.command, node.name,
4006                                      self.op.timeout)
4007
4008       if result.fail_msg:
4009         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4010                         node.name, result.fail_msg)
4011         node_entry.append((constants.RS_NODATA, None))
4012       else:
4013         try:
4014           self._CheckPayload(result)
4015         except errors.OpExecError, err:
4016           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4017                           node.name, err)
4018           node_entry.append((constants.RS_NODATA, None))
4019         else:
4020           if self.op.command == constants.OOB_HEALTH:
4021             # For health we should log important events
4022             for item, status in result.payload:
4023               if status in [constants.OOB_STATUS_WARNING,
4024                             constants.OOB_STATUS_CRITICAL]:
4025                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4026                                 item, node.name, status)
4027
4028           if self.op.command == constants.OOB_POWER_ON:
4029             node.powered = True
4030           elif self.op.command == constants.OOB_POWER_OFF:
4031             node.powered = False
4032           elif self.op.command == constants.OOB_POWER_STATUS:
4033             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4034             if powered != node.powered:
4035               logging.warning(("Recorded power state (%s) of node '%s' does not"
4036                                " match actual power state (%s)"), node.powered,
4037                               node.name, powered)
4038
4039           # For configuration changing commands we should update the node
4040           if self.op.command in (constants.OOB_POWER_ON,
4041                                  constants.OOB_POWER_OFF):
4042             self.cfg.Update(node, feedback_fn)
4043
4044           node_entry.append((constants.RS_NORMAL, result.payload))
4045
4046           if (self.op.command == constants.OOB_POWER_ON and
4047               idx < len(self.nodes) - 1):
4048             time.sleep(self.op.power_delay)
4049
4050     return ret
4051
4052   def _CheckPayload(self, result):
4053     """Checks if the payload is valid.
4054
4055     @param result: RPC result
4056     @raises errors.OpExecError: If payload is not valid
4057
4058     """
4059     errs = []
4060     if self.op.command == constants.OOB_HEALTH:
4061       if not isinstance(result.payload, list):
4062         errs.append("command 'health' is expected to return a list but got %s" %
4063                     type(result.payload))
4064       else:
4065         for item, status in result.payload:
4066           if status not in constants.OOB_STATUSES:
4067             errs.append("health item '%s' has invalid status '%s'" %
4068                         (item, status))
4069
4070     if self.op.command == constants.OOB_POWER_STATUS:
4071       if not isinstance(result.payload, dict):
4072         errs.append("power-status is expected to return a dict but got %s" %
4073                     type(result.payload))
4074
4075     if self.op.command in [
4076         constants.OOB_POWER_ON,
4077         constants.OOB_POWER_OFF,
4078         constants.OOB_POWER_CYCLE,
4079         ]:
4080       if result.payload is not None:
4081         errs.append("%s is expected to not return payload but got '%s'" %
4082                     (self.op.command, result.payload))
4083
4084     if errs:
4085       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4086                                utils.CommaJoin(errs))
4087
4088
4089 class _OsQuery(_QueryBase):
4090   FIELDS = query.OS_FIELDS
4091
4092   def ExpandNames(self, lu):
4093     # Lock all nodes in shared mode
4094     # Temporary removal of locks, should be reverted later
4095     # TODO: reintroduce locks when they are lighter-weight
4096     lu.needed_locks = {}
4097     #self.share_locks[locking.LEVEL_NODE] = 1
4098     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4099
4100     # The following variables interact with _QueryBase._GetNames
4101     if self.names:
4102       self.wanted = self.names
4103     else:
4104       self.wanted = locking.ALL_SET
4105
4106     self.do_locking = self.use_locking
4107
4108   def DeclareLocks(self, lu, level):
4109     pass
4110
4111   @staticmethod
4112   def _DiagnoseByOS(rlist):
4113     """Remaps a per-node return list into an a per-os per-node dictionary
4114
4115     @param rlist: a map with node names as keys and OS objects as values
4116
4117     @rtype: dict
4118     @return: a dictionary with osnames as keys and as value another
4119         map, with nodes as keys and tuples of (path, status, diagnose,
4120         variants, parameters, api_versions) as values, eg::
4121
4122           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4123                                      (/srv/..., False, "invalid api")],
4124                            "node2": [(/srv/..., True, "", [], [])]}
4125           }
4126
4127     """
4128     all_os = {}
4129     # we build here the list of nodes that didn't fail the RPC (at RPC
4130     # level), so that nodes with a non-responding node daemon don't
4131     # make all OSes invalid
4132     good_nodes = [node_name for node_name in rlist
4133                   if not rlist[node_name].fail_msg]
4134     for node_name, nr in rlist.items():
4135       if nr.fail_msg or not nr.payload:
4136         continue
4137       for (name, path, status, diagnose, variants,
4138            params, api_versions) in nr.payload:
4139         if name not in all_os:
4140           # build a list of nodes for this os containing empty lists
4141           # for each node in node_list
4142           all_os[name] = {}
4143           for nname in good_nodes:
4144             all_os[name][nname] = []
4145         # convert params from [name, help] to (name, help)
4146         params = [tuple(v) for v in params]
4147         all_os[name][node_name].append((path, status, diagnose,
4148                                         variants, params, api_versions))
4149     return all_os
4150
4151   def _GetQueryData(self, lu):
4152     """Computes the list of nodes and their attributes.
4153
4154     """
4155     # Locking is not used
4156     assert not (compat.any(lu.glm.is_owned(level)
4157                            for level in locking.LEVELS
4158                            if level != locking.LEVEL_CLUSTER) or
4159                 self.do_locking or self.use_locking)
4160
4161     valid_nodes = [node.name
4162                    for node in lu.cfg.GetAllNodesInfo().values()
4163                    if not node.offline and node.vm_capable]
4164     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4165     cluster = lu.cfg.GetClusterInfo()
4166
4167     data = {}
4168
4169     for (os_name, os_data) in pol.items():
4170       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4171                           hidden=(os_name in cluster.hidden_os),
4172                           blacklisted=(os_name in cluster.blacklisted_os))
4173
4174       variants = set()
4175       parameters = set()
4176       api_versions = set()
4177
4178       for idx, osl in enumerate(os_data.values()):
4179         info.valid = bool(info.valid and osl and osl[0][1])
4180         if not info.valid:
4181           break
4182
4183         (node_variants, node_params, node_api) = osl[0][3:6]
4184         if idx == 0:
4185           # First entry
4186           variants.update(node_variants)
4187           parameters.update(node_params)
4188           api_versions.update(node_api)
4189         else:
4190           # Filter out inconsistent values
4191           variants.intersection_update(node_variants)
4192           parameters.intersection_update(node_params)
4193           api_versions.intersection_update(node_api)
4194
4195       info.variants = list(variants)
4196       info.parameters = list(parameters)
4197       info.api_versions = list(api_versions)
4198
4199       data[os_name] = info
4200
4201     # Prepare data in requested order
4202     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4203             if name in data]
4204
4205
4206 class LUOsDiagnose(NoHooksLU):
4207   """Logical unit for OS diagnose/query.
4208
4209   """
4210   REQ_BGL = False
4211
4212   @staticmethod
4213   def _BuildFilter(fields, names):
4214     """Builds a filter for querying OSes.
4215
4216     """
4217     name_filter = qlang.MakeSimpleFilter("name", names)
4218
4219     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4220     # respective field is not requested
4221     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4222                      for fname in ["hidden", "blacklisted"]
4223                      if fname not in fields]
4224     if "valid" not in fields:
4225       status_filter.append([qlang.OP_TRUE, "valid"])
4226
4227     if status_filter:
4228       status_filter.insert(0, qlang.OP_AND)
4229     else:
4230       status_filter = None
4231
4232     if name_filter and status_filter:
4233       return [qlang.OP_AND, name_filter, status_filter]
4234     elif name_filter:
4235       return name_filter
4236     else:
4237       return status_filter
4238
4239   def CheckArguments(self):
4240     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4241                        self.op.output_fields, False)
4242
4243   def ExpandNames(self):
4244     self.oq.ExpandNames(self)
4245
4246   def Exec(self, feedback_fn):
4247     return self.oq.OldStyleQuery(self)
4248
4249
4250 class LUNodeRemove(LogicalUnit):
4251   """Logical unit for removing a node.
4252
4253   """
4254   HPATH = "node-remove"
4255   HTYPE = constants.HTYPE_NODE
4256
4257   def BuildHooksEnv(self):
4258     """Build hooks env.
4259
4260     This doesn't run on the target node in the pre phase as a failed
4261     node would then be impossible to remove.
4262
4263     """
4264     return {
4265       "OP_TARGET": self.op.node_name,
4266       "NODE_NAME": self.op.node_name,
4267       }
4268
4269   def BuildHooksNodes(self):
4270     """Build hooks nodes.
4271
4272     """
4273     all_nodes = self.cfg.GetNodeList()
4274     try:
4275       all_nodes.remove(self.op.node_name)
4276     except ValueError:
4277       logging.warning("Node '%s', which is about to be removed, was not found"
4278                       " in the list of all nodes", self.op.node_name)
4279     return (all_nodes, all_nodes)
4280
4281   def CheckPrereq(self):
4282     """Check prerequisites.
4283
4284     This checks:
4285      - the node exists in the configuration
4286      - it does not have primary or secondary instances
4287      - it's not the master
4288
4289     Any errors are signaled by raising errors.OpPrereqError.
4290
4291     """
4292     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4293     node = self.cfg.GetNodeInfo(self.op.node_name)
4294     assert node is not None
4295
4296     masternode = self.cfg.GetMasterNode()
4297     if node.name == masternode:
4298       raise errors.OpPrereqError("Node is the master node, failover to another"
4299                                  " node is required", errors.ECODE_INVAL)
4300
4301     for instance_name, instance in self.cfg.GetAllInstancesInfo():
4302       if node.name in instance.all_nodes:
4303         raise errors.OpPrereqError("Instance %s is still running on the node,"
4304                                    " please remove first" % instance_name,
4305                                    errors.ECODE_INVAL)
4306     self.op.node_name = node.name
4307     self.node = node
4308
4309   def Exec(self, feedback_fn):
4310     """Removes the node from the cluster.
4311
4312     """
4313     node = self.node
4314     logging.info("Stopping the node daemon and removing configs from node %s",
4315                  node.name)
4316
4317     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4318
4319     # Promote nodes to master candidate as needed
4320     _AdjustCandidatePool(self, exceptions=[node.name])
4321     self.context.RemoveNode(node.name)
4322
4323     # Run post hooks on the node before it's removed
4324     _RunPostHook(self, node.name)
4325
4326     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4327     msg = result.fail_msg
4328     if msg:
4329       self.LogWarning("Errors encountered on the remote node while leaving"
4330                       " the cluster: %s", msg)
4331
4332     # Remove node from our /etc/hosts
4333     if self.cfg.GetClusterInfo().modify_etc_hosts:
4334       master_node = self.cfg.GetMasterNode()
4335       result = self.rpc.call_etc_hosts_modify(master_node,
4336                                               constants.ETC_HOSTS_REMOVE,
4337                                               node.name, None)
4338       result.Raise("Can't update hosts file with new host data")
4339       _RedistributeAncillaryFiles(self)
4340
4341
4342 class _NodeQuery(_QueryBase):
4343   FIELDS = query.NODE_FIELDS
4344
4345   def ExpandNames(self, lu):
4346     lu.needed_locks = {}
4347     lu.share_locks = _ShareAll()
4348
4349     if self.names:
4350       self.wanted = _GetWantedNodes(lu, self.names)
4351     else:
4352       self.wanted = locking.ALL_SET
4353
4354     self.do_locking = (self.use_locking and
4355                        query.NQ_LIVE in self.requested_data)
4356
4357     if self.do_locking:
4358       # If any non-static field is requested we need to lock the nodes
4359       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4360
4361   def DeclareLocks(self, lu, level):
4362     pass
4363
4364   def _GetQueryData(self, lu):
4365     """Computes the list of nodes and their attributes.
4366
4367     """
4368     all_info = lu.cfg.GetAllNodesInfo()
4369
4370     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4371
4372     # Gather data as requested
4373     if query.NQ_LIVE in self.requested_data:
4374       # filter out non-vm_capable nodes
4375       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4376
4377       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4378                                         lu.cfg.GetHypervisorType())
4379       live_data = dict((name, nresult.payload)
4380                        for (name, nresult) in node_data.items()
4381                        if not nresult.fail_msg and nresult.payload)
4382     else:
4383       live_data = None
4384
4385     if query.NQ_INST in self.requested_data:
4386       node_to_primary = dict([(name, set()) for name in nodenames])
4387       node_to_secondary = dict([(name, set()) for name in nodenames])
4388
4389       inst_data = lu.cfg.GetAllInstancesInfo()
4390
4391       for inst in inst_data.values():
4392         if inst.primary_node in node_to_primary:
4393           node_to_primary[inst.primary_node].add(inst.name)
4394         for secnode in inst.secondary_nodes:
4395           if secnode in node_to_secondary:
4396             node_to_secondary[secnode].add(inst.name)
4397     else:
4398       node_to_primary = None
4399       node_to_secondary = None
4400
4401     if query.NQ_OOB in self.requested_data:
4402       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4403                          for name, node in all_info.iteritems())
4404     else:
4405       oob_support = None
4406
4407     if query.NQ_GROUP in self.requested_data:
4408       groups = lu.cfg.GetAllNodeGroupsInfo()
4409     else:
4410       groups = {}
4411
4412     return query.NodeQueryData([all_info[name] for name in nodenames],
4413                                live_data, lu.cfg.GetMasterNode(),
4414                                node_to_primary, node_to_secondary, groups,
4415                                oob_support, lu.cfg.GetClusterInfo())
4416
4417
4418 class LUNodeQuery(NoHooksLU):
4419   """Logical unit for querying nodes.
4420
4421   """
4422   # pylint: disable=W0142
4423   REQ_BGL = False
4424
4425   def CheckArguments(self):
4426     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4427                          self.op.output_fields, self.op.use_locking)
4428
4429   def ExpandNames(self):
4430     self.nq.ExpandNames(self)
4431
4432   def Exec(self, feedback_fn):
4433     return self.nq.OldStyleQuery(self)
4434
4435
4436 class LUNodeQueryvols(NoHooksLU):
4437   """Logical unit for getting volumes on node(s).
4438
4439   """
4440   REQ_BGL = False
4441   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4442   _FIELDS_STATIC = utils.FieldSet("node")
4443
4444   def CheckArguments(self):
4445     _CheckOutputFields(static=self._FIELDS_STATIC,
4446                        dynamic=self._FIELDS_DYNAMIC,
4447                        selected=self.op.output_fields)
4448
4449   def ExpandNames(self):
4450     self.needed_locks = {}
4451     self.share_locks[locking.LEVEL_NODE] = 1
4452     if not self.op.nodes:
4453       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4454     else:
4455       self.needed_locks[locking.LEVEL_NODE] = \
4456         _GetWantedNodes(self, self.op.nodes)
4457
4458   def Exec(self, feedback_fn):
4459     """Computes the list of nodes and their attributes.
4460
4461     """
4462     nodenames = self.owned_locks(locking.LEVEL_NODE)
4463     volumes = self.rpc.call_node_volumes(nodenames)
4464
4465     ilist = self.cfg.GetAllInstancesInfo()
4466     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4467
4468     output = []
4469     for node in nodenames:
4470       nresult = volumes[node]
4471       if nresult.offline:
4472         continue
4473       msg = nresult.fail_msg
4474       if msg:
4475         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4476         continue
4477
4478       node_vols = sorted(nresult.payload,
4479                          key=operator.itemgetter("dev"))
4480
4481       for vol in node_vols:
4482         node_output = []
4483         for field in self.op.output_fields:
4484           if field == "node":
4485             val = node
4486           elif field == "phys":
4487             val = vol["dev"]
4488           elif field == "vg":
4489             val = vol["vg"]
4490           elif field == "name":
4491             val = vol["name"]
4492           elif field == "size":
4493             val = int(float(vol["size"]))
4494           elif field == "instance":
4495             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4496           else:
4497             raise errors.ParameterError(field)
4498           node_output.append(str(val))
4499
4500         output.append(node_output)
4501
4502     return output
4503
4504
4505 class LUNodeQueryStorage(NoHooksLU):
4506   """Logical unit for getting information on storage units on node(s).
4507
4508   """
4509   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4510   REQ_BGL = False
4511
4512   def CheckArguments(self):
4513     _CheckOutputFields(static=self._FIELDS_STATIC,
4514                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4515                        selected=self.op.output_fields)
4516
4517   def ExpandNames(self):
4518     self.needed_locks = {}
4519     self.share_locks[locking.LEVEL_NODE] = 1
4520
4521     if self.op.nodes:
4522       self.needed_locks[locking.LEVEL_NODE] = \
4523         _GetWantedNodes(self, self.op.nodes)
4524     else:
4525       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4526
4527   def Exec(self, feedback_fn):
4528     """Computes the list of nodes and their attributes.
4529
4530     """
4531     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4532
4533     # Always get name to sort by
4534     if constants.SF_NAME in self.op.output_fields:
4535       fields = self.op.output_fields[:]
4536     else:
4537       fields = [constants.SF_NAME] + self.op.output_fields
4538
4539     # Never ask for node or type as it's only known to the LU
4540     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4541       while extra in fields:
4542         fields.remove(extra)
4543
4544     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4545     name_idx = field_idx[constants.SF_NAME]
4546
4547     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4548     data = self.rpc.call_storage_list(self.nodes,
4549                                       self.op.storage_type, st_args,
4550                                       self.op.name, fields)
4551
4552     result = []
4553
4554     for node in utils.NiceSort(self.nodes):
4555       nresult = data[node]
4556       if nresult.offline:
4557         continue
4558
4559       msg = nresult.fail_msg
4560       if msg:
4561         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4562         continue
4563
4564       rows = dict([(row[name_idx], row) for row in nresult.payload])
4565
4566       for name in utils.NiceSort(rows.keys()):
4567         row = rows[name]
4568
4569         out = []
4570
4571         for field in self.op.output_fields:
4572           if field == constants.SF_NODE:
4573             val = node
4574           elif field == constants.SF_TYPE:
4575             val = self.op.storage_type
4576           elif field in field_idx:
4577             val = row[field_idx[field]]
4578           else:
4579             raise errors.ParameterError(field)
4580
4581           out.append(val)
4582
4583         result.append(out)
4584
4585     return result
4586
4587
4588 class _InstanceQuery(_QueryBase):
4589   FIELDS = query.INSTANCE_FIELDS
4590
4591   def ExpandNames(self, lu):
4592     lu.needed_locks = {}
4593     lu.share_locks = _ShareAll()
4594
4595     if self.names:
4596       self.wanted = _GetWantedInstances(lu, self.names)
4597     else:
4598       self.wanted = locking.ALL_SET
4599
4600     self.do_locking = (self.use_locking and
4601                        query.IQ_LIVE in self.requested_data)
4602     if self.do_locking:
4603       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4604       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4605       lu.needed_locks[locking.LEVEL_NODE] = []
4606       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4607
4608     self.do_grouplocks = (self.do_locking and
4609                           query.IQ_NODES in self.requested_data)
4610
4611   def DeclareLocks(self, lu, level):
4612     if self.do_locking:
4613       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4614         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4615
4616         # Lock all groups used by instances optimistically; this requires going
4617         # via the node before it's locked, requiring verification later on
4618         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4619           set(group_uuid
4620               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4621               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4622       elif level == locking.LEVEL_NODE:
4623         lu._LockInstancesNodes() # pylint: disable=W0212
4624
4625   @staticmethod
4626   def _CheckGroupLocks(lu):
4627     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4628     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4629
4630     # Check if node groups for locked instances are still correct
4631     for instance_name in owned_instances:
4632       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4633
4634   def _GetQueryData(self, lu):
4635     """Computes the list of instances and their attributes.
4636
4637     """
4638     if self.do_grouplocks:
4639       self._CheckGroupLocks(lu)
4640
4641     cluster = lu.cfg.GetClusterInfo()
4642     all_info = lu.cfg.GetAllInstancesInfo()
4643
4644     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4645
4646     instance_list = [all_info[name] for name in instance_names]
4647     nodes = frozenset(itertools.chain(*(inst.all_nodes
4648                                         for inst in instance_list)))
4649     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4650     bad_nodes = []
4651     offline_nodes = []
4652     wrongnode_inst = set()
4653
4654     # Gather data as requested
4655     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4656       live_data = {}
4657       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4658       for name in nodes:
4659         result = node_data[name]
4660         if result.offline:
4661           # offline nodes will be in both lists
4662           assert result.fail_msg
4663           offline_nodes.append(name)
4664         if result.fail_msg:
4665           bad_nodes.append(name)
4666         elif result.payload:
4667           for inst in result.payload:
4668             if inst in all_info:
4669               if all_info[inst].primary_node == name:
4670                 live_data.update(result.payload)
4671               else:
4672                 wrongnode_inst.add(inst)
4673             else:
4674               # orphan instance; we don't list it here as we don't
4675               # handle this case yet in the output of instance listing
4676               logging.warning("Orphan instance '%s' found on node %s",
4677                               inst, name)
4678         # else no instance is alive
4679     else:
4680       live_data = {}
4681
4682     if query.IQ_DISKUSAGE in self.requested_data:
4683       disk_usage = dict((inst.name,
4684                          _ComputeDiskSize(inst.disk_template,
4685                                           [{constants.IDISK_SIZE: disk.size}
4686                                            for disk in inst.disks]))
4687                         for inst in instance_list)
4688     else:
4689       disk_usage = None
4690
4691     if query.IQ_CONSOLE in self.requested_data:
4692       consinfo = {}
4693       for inst in instance_list:
4694         if inst.name in live_data:
4695           # Instance is running
4696           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4697         else:
4698           consinfo[inst.name] = None
4699       assert set(consinfo.keys()) == set(instance_names)
4700     else:
4701       consinfo = None
4702
4703     if query.IQ_NODES in self.requested_data:
4704       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4705                                             instance_list)))
4706       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4707       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4708                     for uuid in set(map(operator.attrgetter("group"),
4709                                         nodes.values())))
4710     else:
4711       nodes = None
4712       groups = None
4713
4714     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4715                                    disk_usage, offline_nodes, bad_nodes,
4716                                    live_data, wrongnode_inst, consinfo,
4717                                    nodes, groups)
4718
4719
4720 class LUQuery(NoHooksLU):
4721   """Query for resources/items of a certain kind.
4722
4723   """
4724   # pylint: disable=W0142
4725   REQ_BGL = False
4726
4727   def CheckArguments(self):
4728     qcls = _GetQueryImplementation(self.op.what)
4729
4730     self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4731
4732   def ExpandNames(self):
4733     self.impl.ExpandNames(self)
4734
4735   def DeclareLocks(self, level):
4736     self.impl.DeclareLocks(self, level)
4737
4738   def Exec(self, feedback_fn):
4739     return self.impl.NewStyleQuery(self)
4740
4741
4742 class LUQueryFields(NoHooksLU):
4743   """Query for resources/items of a certain kind.
4744
4745   """
4746   # pylint: disable=W0142
4747   REQ_BGL = False
4748
4749   def CheckArguments(self):
4750     self.qcls = _GetQueryImplementation(self.op.what)
4751
4752   def ExpandNames(self):
4753     self.needed_locks = {}
4754
4755   def Exec(self, feedback_fn):
4756     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4757
4758
4759 class LUNodeModifyStorage(NoHooksLU):
4760   """Logical unit for modifying a storage volume on a node.
4761
4762   """
4763   REQ_BGL = False
4764
4765   def CheckArguments(self):
4766     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4767
4768     storage_type = self.op.storage_type
4769
4770     try:
4771       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4772     except KeyError:
4773       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4774                                  " modified" % storage_type,
4775                                  errors.ECODE_INVAL)
4776
4777     diff = set(self.op.changes.keys()) - modifiable
4778     if diff:
4779       raise errors.OpPrereqError("The following fields can not be modified for"
4780                                  " storage units of type '%s': %r" %
4781                                  (storage_type, list(diff)),
4782                                  errors.ECODE_INVAL)
4783
4784   def ExpandNames(self):
4785     self.needed_locks = {
4786       locking.LEVEL_NODE: self.op.node_name,
4787       }
4788
4789   def Exec(self, feedback_fn):
4790     """Computes the list of nodes and their attributes.
4791
4792     """
4793     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4794     result = self.rpc.call_storage_modify(self.op.node_name,
4795                                           self.op.storage_type, st_args,
4796                                           self.op.name, self.op.changes)
4797     result.Raise("Failed to modify storage unit '%s' on %s" %
4798                  (self.op.name, self.op.node_name))
4799
4800
4801 class LUNodeAdd(LogicalUnit):
4802   """Logical unit for adding node to the cluster.
4803
4804   """
4805   HPATH = "node-add"
4806   HTYPE = constants.HTYPE_NODE
4807   _NFLAGS = ["master_capable", "vm_capable"]
4808
4809   def CheckArguments(self):
4810     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4811     # validate/normalize the node name
4812     self.hostname = netutils.GetHostname(name=self.op.node_name,
4813                                          family=self.primary_ip_family)
4814     self.op.node_name = self.hostname.name
4815
4816     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4817       raise errors.OpPrereqError("Cannot readd the master node",
4818                                  errors.ECODE_STATE)
4819
4820     if self.op.readd and self.op.group:
4821       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4822                                  " being readded", errors.ECODE_INVAL)
4823
4824   def BuildHooksEnv(self):
4825     """Build hooks env.
4826
4827     This will run on all nodes before, and on all nodes + the new node after.
4828
4829     """
4830     return {
4831       "OP_TARGET": self.op.node_name,
4832       "NODE_NAME": self.op.node_name,
4833       "NODE_PIP": self.op.primary_ip,
4834       "NODE_SIP": self.op.secondary_ip,
4835       "MASTER_CAPABLE": str(self.op.master_capable),
4836       "VM_CAPABLE": str(self.op.vm_capable),
4837       }
4838
4839   def BuildHooksNodes(self):
4840     """Build hooks nodes.
4841
4842     """
4843     # Exclude added node
4844     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4845     post_nodes = pre_nodes + [self.op.node_name, ]
4846
4847     return (pre_nodes, post_nodes)
4848
4849   def CheckPrereq(self):
4850     """Check prerequisites.
4851
4852     This checks:
4853      - the new node is not already in the config
4854      - it is resolvable
4855      - its parameters (single/dual homed) matches the cluster
4856
4857     Any errors are signaled by raising errors.OpPrereqError.
4858
4859     """
4860     cfg = self.cfg
4861     hostname = self.hostname
4862     node = hostname.name
4863     primary_ip = self.op.primary_ip = hostname.ip
4864     if self.op.secondary_ip is None:
4865       if self.primary_ip_family == netutils.IP6Address.family:
4866         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4867                                    " IPv4 address must be given as secondary",
4868                                    errors.ECODE_INVAL)
4869       self.op.secondary_ip = primary_ip
4870
4871     secondary_ip = self.op.secondary_ip
4872     if not netutils.IP4Address.IsValid(secondary_ip):
4873       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4874                                  " address" % secondary_ip, errors.ECODE_INVAL)
4875
4876     node_list = cfg.GetNodeList()
4877     if not self.op.readd and node in node_list:
4878       raise errors.OpPrereqError("Node %s is already in the configuration" %
4879                                  node, errors.ECODE_EXISTS)
4880     elif self.op.readd and node not in node_list:
4881       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4882                                  errors.ECODE_NOENT)
4883
4884     self.changed_primary_ip = False
4885
4886     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4887       if self.op.readd and node == existing_node_name:
4888         if existing_node.secondary_ip != secondary_ip:
4889           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4890                                      " address configuration as before",
4891                                      errors.ECODE_INVAL)
4892         if existing_node.primary_ip != primary_ip:
4893           self.changed_primary_ip = True
4894
4895         continue
4896
4897       if (existing_node.primary_ip == primary_ip or
4898           existing_node.secondary_ip == primary_ip or
4899           existing_node.primary_ip == secondary_ip or
4900           existing_node.secondary_ip == secondary_ip):
4901         raise errors.OpPrereqError("New node ip address(es) conflict with"
4902                                    " existing node %s" % existing_node.name,
4903                                    errors.ECODE_NOTUNIQUE)
4904
4905     # After this 'if' block, None is no longer a valid value for the
4906     # _capable op attributes
4907     if self.op.readd:
4908       old_node = self.cfg.GetNodeInfo(node)
4909       assert old_node is not None, "Can't retrieve locked node %s" % node
4910       for attr in self._NFLAGS:
4911         if getattr(self.op, attr) is None:
4912           setattr(self.op, attr, getattr(old_node, attr))
4913     else:
4914       for attr in self._NFLAGS:
4915         if getattr(self.op, attr) is None:
4916           setattr(self.op, attr, True)
4917
4918     if self.op.readd and not self.op.vm_capable:
4919       pri, sec = cfg.GetNodeInstances(node)
4920       if pri or sec:
4921         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4922                                    " flag set to false, but it already holds"
4923                                    " instances" % node,
4924                                    errors.ECODE_STATE)
4925
4926     # check that the type of the node (single versus dual homed) is the
4927     # same as for the master
4928     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4929     master_singlehomed = myself.secondary_ip == myself.primary_ip
4930     newbie_singlehomed = secondary_ip == primary_ip
4931     if master_singlehomed != newbie_singlehomed:
4932       if master_singlehomed:
4933         raise errors.OpPrereqError("The master has no secondary ip but the"
4934                                    " new node has one",
4935                                    errors.ECODE_INVAL)
4936       else:
4937         raise errors.OpPrereqError("The master has a secondary ip but the"
4938                                    " new node doesn't have one",
4939                                    errors.ECODE_INVAL)
4940
4941     # checks reachability
4942     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4943       raise errors.OpPrereqError("Node not reachable by ping",
4944                                  errors.ECODE_ENVIRON)
4945
4946     if not newbie_singlehomed:
4947       # check reachability from my secondary ip to newbie's secondary ip
4948       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4949                            source=myself.secondary_ip):
4950         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4951                                    " based ping to node daemon port",
4952                                    errors.ECODE_ENVIRON)
4953
4954     if self.op.readd:
4955       exceptions = [node]
4956     else:
4957       exceptions = []
4958
4959     if self.op.master_capable:
4960       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4961     else:
4962       self.master_candidate = False
4963
4964     if self.op.readd:
4965       self.new_node = old_node
4966     else:
4967       node_group = cfg.LookupNodeGroup(self.op.group)
4968       self.new_node = objects.Node(name=node,
4969                                    primary_ip=primary_ip,
4970                                    secondary_ip=secondary_ip,
4971                                    master_candidate=self.master_candidate,
4972                                    offline=False, drained=False,
4973                                    group=node_group)
4974
4975     if self.op.ndparams:
4976       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4977
4978   def Exec(self, feedback_fn):
4979     """Adds the new node to the cluster.
4980
4981     """
4982     new_node = self.new_node
4983     node = new_node.name
4984
4985     # We adding a new node so we assume it's powered
4986     new_node.powered = True
4987
4988     # for re-adds, reset the offline/drained/master-candidate flags;
4989     # we need to reset here, otherwise offline would prevent RPC calls
4990     # later in the procedure; this also means that if the re-add
4991     # fails, we are left with a non-offlined, broken node
4992     if self.op.readd:
4993       new_node.drained = new_node.offline = False # pylint: disable=W0201
4994       self.LogInfo("Readding a node, the offline/drained flags were reset")
4995       # if we demote the node, we do cleanup later in the procedure
4996       new_node.master_candidate = self.master_candidate
4997       if self.changed_primary_ip:
4998         new_node.primary_ip = self.op.primary_ip
4999
5000     # copy the master/vm_capable flags
5001     for attr in self._NFLAGS:
5002       setattr(new_node, attr, getattr(self.op, attr))
5003
5004     # notify the user about any possible mc promotion
5005     if new_node.master_candidate:
5006       self.LogInfo("Node will be a master candidate")
5007
5008     if self.op.ndparams:
5009       new_node.ndparams = self.op.ndparams
5010     else:
5011       new_node.ndparams = {}
5012
5013     # check connectivity
5014     result = self.rpc.call_version([node])[node]
5015     result.Raise("Can't get version information from node %s" % node)
5016     if constants.PROTOCOL_VERSION == result.payload:
5017       logging.info("Communication to node %s fine, sw version %s match",
5018                    node, result.payload)
5019     else:
5020       raise errors.OpExecError("Version mismatch master version %s,"
5021                                " node version %s" %
5022                                (constants.PROTOCOL_VERSION, result.payload))
5023
5024     # Add node to our /etc/hosts, and add key to known_hosts
5025     if self.cfg.GetClusterInfo().modify_etc_hosts:
5026       master_node = self.cfg.GetMasterNode()
5027       result = self.rpc.call_etc_hosts_modify(master_node,
5028                                               constants.ETC_HOSTS_ADD,
5029                                               self.hostname.name,
5030                                               self.hostname.ip)
5031       result.Raise("Can't update hosts file with new host data")
5032
5033     if new_node.secondary_ip != new_node.primary_ip:
5034       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5035                                False)
5036
5037     node_verify_list = [self.cfg.GetMasterNode()]
5038     node_verify_param = {
5039       constants.NV_NODELIST: [node],
5040       # TODO: do a node-net-test as well?
5041     }
5042
5043     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5044                                        self.cfg.GetClusterName())
5045     for verifier in node_verify_list:
5046       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5047       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5048       if nl_payload:
5049         for failed in nl_payload:
5050           feedback_fn("ssh/hostname verification failed"
5051                       " (checking from %s): %s" %
5052                       (verifier, nl_payload[failed]))
5053         raise errors.OpExecError("ssh/hostname verification failed")
5054
5055     if self.op.readd:
5056       _RedistributeAncillaryFiles(self)
5057       self.context.ReaddNode(new_node)
5058       # make sure we redistribute the config
5059       self.cfg.Update(new_node, feedback_fn)
5060       # and make sure the new node will not have old files around
5061       if not new_node.master_candidate:
5062         result = self.rpc.call_node_demote_from_mc(new_node.name)
5063         msg = result.fail_msg
5064         if msg:
5065           self.LogWarning("Node failed to demote itself from master"
5066                           " candidate status: %s" % msg)
5067     else:
5068       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5069                                   additional_vm=self.op.vm_capable)
5070       self.context.AddNode(new_node, self.proc.GetECId())
5071
5072
5073 class LUNodeSetParams(LogicalUnit):
5074   """Modifies the parameters of a node.
5075
5076   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5077       to the node role (as _ROLE_*)
5078   @cvar _R2F: a dictionary from node role to tuples of flags
5079   @cvar _FLAGS: a list of attribute names corresponding to the flags
5080
5081   """
5082   HPATH = "node-modify"
5083   HTYPE = constants.HTYPE_NODE
5084   REQ_BGL = False
5085   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5086   _F2R = {
5087     (True, False, False): _ROLE_CANDIDATE,
5088     (False, True, False): _ROLE_DRAINED,
5089     (False, False, True): _ROLE_OFFLINE,
5090     (False, False, False): _ROLE_REGULAR,
5091     }
5092   _R2F = dict((v, k) for k, v in _F2R.items())
5093   _FLAGS = ["master_candidate", "drained", "offline"]
5094
5095   def CheckArguments(self):
5096     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5097     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5098                 self.op.master_capable, self.op.vm_capable,
5099                 self.op.secondary_ip, self.op.ndparams]
5100     if all_mods.count(None) == len(all_mods):
5101       raise errors.OpPrereqError("Please pass at least one modification",
5102                                  errors.ECODE_INVAL)
5103     if all_mods.count(True) > 1:
5104       raise errors.OpPrereqError("Can't set the node into more than one"
5105                                  " state at the same time",
5106                                  errors.ECODE_INVAL)
5107
5108     # Boolean value that tells us whether we might be demoting from MC
5109     self.might_demote = (self.op.master_candidate == False or
5110                          self.op.offline == True or
5111                          self.op.drained == True or
5112                          self.op.master_capable == False)
5113
5114     if self.op.secondary_ip:
5115       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5116         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5117                                    " address" % self.op.secondary_ip,
5118                                    errors.ECODE_INVAL)
5119
5120     self.lock_all = self.op.auto_promote and self.might_demote
5121     self.lock_instances = self.op.secondary_ip is not None
5122
5123   def ExpandNames(self):
5124     if self.lock_all:
5125       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5126     else:
5127       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5128
5129     if self.lock_instances:
5130       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5131
5132   def DeclareLocks(self, level):
5133     # If we have locked all instances, before waiting to lock nodes, release
5134     # all the ones living on nodes unrelated to the current operation.
5135     if level == locking.LEVEL_NODE and self.lock_instances:
5136       self.affected_instances = []
5137       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5138         instances_keep = []
5139
5140         # Build list of instances to release
5141         locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5142         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5143           if (instance.disk_template in constants.DTS_INT_MIRROR and
5144               self.op.node_name in instance.all_nodes):
5145             instances_keep.append(instance_name)
5146             self.affected_instances.append(instance)
5147
5148         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5149
5150         assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5151                 set(instances_keep))
5152
5153   def BuildHooksEnv(self):
5154     """Build hooks env.
5155
5156     This runs on the master node.
5157
5158     """
5159     return {
5160       "OP_TARGET": self.op.node_name,
5161       "MASTER_CANDIDATE": str(self.op.master_candidate),
5162       "OFFLINE": str(self.op.offline),
5163       "DRAINED": str(self.op.drained),
5164       "MASTER_CAPABLE": str(self.op.master_capable),
5165       "VM_CAPABLE": str(self.op.vm_capable),
5166       }
5167
5168   def BuildHooksNodes(self):
5169     """Build hooks nodes.
5170
5171     """
5172     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5173     return (nl, nl)
5174
5175   def CheckPrereq(self):
5176     """Check prerequisites.
5177
5178     This only checks the instance list against the existing names.
5179
5180     """
5181     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5182
5183     if (self.op.master_candidate is not None or
5184         self.op.drained is not None or
5185         self.op.offline is not None):
5186       # we can't change the master's node flags
5187       if self.op.node_name == self.cfg.GetMasterNode():
5188         raise errors.OpPrereqError("The master role can be changed"
5189                                    " only via master-failover",
5190                                    errors.ECODE_INVAL)
5191
5192     if self.op.master_candidate and not node.master_capable:
5193       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5194                                  " it a master candidate" % node.name,
5195                                  errors.ECODE_STATE)
5196
5197     if self.op.vm_capable == False:
5198       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5199       if ipri or isec:
5200         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5201                                    " the vm_capable flag" % node.name,
5202                                    errors.ECODE_STATE)
5203
5204     if node.master_candidate and self.might_demote and not self.lock_all:
5205       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5206       # check if after removing the current node, we're missing master
5207       # candidates
5208       (mc_remaining, mc_should, _) = \
5209           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5210       if mc_remaining < mc_should:
5211         raise errors.OpPrereqError("Not enough master candidates, please"
5212                                    " pass auto promote option to allow"
5213                                    " promotion", errors.ECODE_STATE)
5214
5215     self.old_flags = old_flags = (node.master_candidate,
5216                                   node.drained, node.offline)
5217     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5218     self.old_role = old_role = self._F2R[old_flags]
5219
5220     # Check for ineffective changes
5221     for attr in self._FLAGS:
5222       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5223         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5224         setattr(self.op, attr, None)
5225
5226     # Past this point, any flag change to False means a transition
5227     # away from the respective state, as only real changes are kept
5228
5229     # TODO: We might query the real power state if it supports OOB
5230     if _SupportsOob(self.cfg, node):
5231       if self.op.offline is False and not (node.powered or
5232                                            self.op.powered == True):
5233         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5234                                     " offline status can be reset") %
5235                                    self.op.node_name)
5236     elif self.op.powered is not None:
5237       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5238                                   " as it does not support out-of-band"
5239                                   " handling") % self.op.node_name)
5240
5241     # If we're being deofflined/drained, we'll MC ourself if needed
5242     if (self.op.drained == False or self.op.offline == False or
5243         (self.op.master_capable and not node.master_capable)):
5244       if _DecideSelfPromotion(self):
5245         self.op.master_candidate = True
5246         self.LogInfo("Auto-promoting node to master candidate")
5247
5248     # If we're no longer master capable, we'll demote ourselves from MC
5249     if self.op.master_capable == False and node.master_candidate:
5250       self.LogInfo("Demoting from master candidate")
5251       self.op.master_candidate = False
5252
5253     # Compute new role
5254     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5255     if self.op.master_candidate:
5256       new_role = self._ROLE_CANDIDATE
5257     elif self.op.drained:
5258       new_role = self._ROLE_DRAINED
5259     elif self.op.offline:
5260       new_role = self._ROLE_OFFLINE
5261     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5262       # False is still in new flags, which means we're un-setting (the
5263       # only) True flag
5264       new_role = self._ROLE_REGULAR
5265     else: # no new flags, nothing, keep old role
5266       new_role = old_role
5267
5268     self.new_role = new_role
5269
5270     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5271       # Trying to transition out of offline status
5272       result = self.rpc.call_version([node.name])[node.name]
5273       if result.fail_msg:
5274         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5275                                    " to report its version: %s" %
5276                                    (node.name, result.fail_msg),
5277                                    errors.ECODE_STATE)
5278       else:
5279         self.LogWarning("Transitioning node from offline to online state"
5280                         " without using re-add. Please make sure the node"
5281                         " is healthy!")
5282
5283     if self.op.secondary_ip:
5284       # Ok even without locking, because this can't be changed by any LU
5285       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5286       master_singlehomed = master.secondary_ip == master.primary_ip
5287       if master_singlehomed and self.op.secondary_ip:
5288         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5289                                    " homed cluster", errors.ECODE_INVAL)
5290
5291       if node.offline:
5292         if self.affected_instances:
5293           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5294                                      " node has instances (%s) configured"
5295                                      " to use it" % self.affected_instances)
5296       else:
5297         # On online nodes, check that no instances are running, and that
5298         # the node has the new ip and we can reach it.
5299         for instance in self.affected_instances:
5300           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5301
5302         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5303         if master.name != node.name:
5304           # check reachability from master secondary ip to new secondary ip
5305           if not netutils.TcpPing(self.op.secondary_ip,
5306                                   constants.DEFAULT_NODED_PORT,
5307                                   source=master.secondary_ip):
5308             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5309                                        " based ping to node daemon port",
5310                                        errors.ECODE_ENVIRON)
5311
5312     if self.op.ndparams:
5313       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5314       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5315       self.new_ndparams = new_ndparams
5316
5317   def Exec(self, feedback_fn):
5318     """Modifies a node.
5319
5320     """
5321     node = self.node
5322     old_role = self.old_role
5323     new_role = self.new_role
5324
5325     result = []
5326
5327     if self.op.ndparams:
5328       node.ndparams = self.new_ndparams
5329
5330     if self.op.powered is not None:
5331       node.powered = self.op.powered
5332
5333     for attr in ["master_capable", "vm_capable"]:
5334       val = getattr(self.op, attr)
5335       if val is not None:
5336         setattr(node, attr, val)
5337         result.append((attr, str(val)))
5338
5339     if new_role != old_role:
5340       # Tell the node to demote itself, if no longer MC and not offline
5341       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5342         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5343         if msg:
5344           self.LogWarning("Node failed to demote itself: %s", msg)
5345
5346       new_flags = self._R2F[new_role]
5347       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5348         if of != nf:
5349           result.append((desc, str(nf)))
5350       (node.master_candidate, node.drained, node.offline) = new_flags
5351
5352       # we locked all nodes, we adjust the CP before updating this node
5353       if self.lock_all:
5354         _AdjustCandidatePool(self, [node.name])
5355
5356     if self.op.secondary_ip:
5357       node.secondary_ip = self.op.secondary_ip
5358       result.append(("secondary_ip", self.op.secondary_ip))
5359
5360     # this will trigger configuration file update, if needed
5361     self.cfg.Update(node, feedback_fn)
5362
5363     # this will trigger job queue propagation or cleanup if the mc
5364     # flag changed
5365     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5366       self.context.ReaddNode(node)
5367
5368     return result
5369
5370
5371 class LUNodePowercycle(NoHooksLU):
5372   """Powercycles a node.
5373
5374   """
5375   REQ_BGL = False
5376
5377   def CheckArguments(self):
5378     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5379     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5380       raise errors.OpPrereqError("The node is the master and the force"
5381                                  " parameter was not set",
5382                                  errors.ECODE_INVAL)
5383
5384   def ExpandNames(self):
5385     """Locking for PowercycleNode.
5386
5387     This is a last-resort option and shouldn't block on other
5388     jobs. Therefore, we grab no locks.
5389
5390     """
5391     self.needed_locks = {}
5392
5393   def Exec(self, feedback_fn):
5394     """Reboots a node.
5395
5396     """
5397     result = self.rpc.call_node_powercycle(self.op.node_name,
5398                                            self.cfg.GetHypervisorType())
5399     result.Raise("Failed to schedule the reboot")
5400     return result.payload
5401
5402
5403 class LUClusterQuery(NoHooksLU):
5404   """Query cluster configuration.
5405
5406   """
5407   REQ_BGL = False
5408
5409   def ExpandNames(self):
5410     self.needed_locks = {}
5411
5412   def Exec(self, feedback_fn):
5413     """Return cluster config.
5414
5415     """
5416     cluster = self.cfg.GetClusterInfo()
5417     os_hvp = {}
5418
5419     # Filter just for enabled hypervisors
5420     for os_name, hv_dict in cluster.os_hvp.items():
5421       os_hvp[os_name] = {}
5422       for hv_name, hv_params in hv_dict.items():
5423         if hv_name in cluster.enabled_hypervisors:
5424           os_hvp[os_name][hv_name] = hv_params
5425
5426     # Convert ip_family to ip_version
5427     primary_ip_version = constants.IP4_VERSION
5428     if cluster.primary_ip_family == netutils.IP6Address.family:
5429       primary_ip_version = constants.IP6_VERSION
5430
5431     result = {
5432       "software_version": constants.RELEASE_VERSION,
5433       "protocol_version": constants.PROTOCOL_VERSION,
5434       "config_version": constants.CONFIG_VERSION,
5435       "os_api_version": max(constants.OS_API_VERSIONS),
5436       "export_version": constants.EXPORT_VERSION,
5437       "architecture": (platform.architecture()[0], platform.machine()),
5438       "name": cluster.cluster_name,
5439       "master": cluster.master_node,
5440       "default_hypervisor": cluster.enabled_hypervisors[0],
5441       "enabled_hypervisors": cluster.enabled_hypervisors,
5442       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5443                         for hypervisor_name in cluster.enabled_hypervisors]),
5444       "os_hvp": os_hvp,
5445       "beparams": cluster.beparams,
5446       "osparams": cluster.osparams,
5447       "nicparams": cluster.nicparams,
5448       "ndparams": cluster.ndparams,
5449       "candidate_pool_size": cluster.candidate_pool_size,
5450       "master_netdev": cluster.master_netdev,
5451       "volume_group_name": cluster.volume_group_name,
5452       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5453       "file_storage_dir": cluster.file_storage_dir,
5454       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5455       "maintain_node_health": cluster.maintain_node_health,
5456       "ctime": cluster.ctime,
5457       "mtime": cluster.mtime,
5458       "uuid": cluster.uuid,
5459       "tags": list(cluster.GetTags()),
5460       "uid_pool": cluster.uid_pool,
5461       "default_iallocator": cluster.default_iallocator,
5462       "reserved_lvs": cluster.reserved_lvs,
5463       "primary_ip_version": primary_ip_version,
5464       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5465       "hidden_os": cluster.hidden_os,
5466       "blacklisted_os": cluster.blacklisted_os,
5467       }
5468
5469     return result
5470
5471
5472 class LUClusterConfigQuery(NoHooksLU):
5473   """Return configuration values.
5474
5475   """
5476   REQ_BGL = False
5477   _FIELDS_DYNAMIC = utils.FieldSet()
5478   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5479                                   "watcher_pause", "volume_group_name")
5480
5481   def CheckArguments(self):
5482     _CheckOutputFields(static=self._FIELDS_STATIC,
5483                        dynamic=self._FIELDS_DYNAMIC,
5484                        selected=self.op.output_fields)
5485
5486   def ExpandNames(self):
5487     self.needed_locks = {}
5488
5489   def Exec(self, feedback_fn):
5490     """Dump a representation of the cluster config to the standard output.
5491
5492     """
5493     values = []
5494     for field in self.op.output_fields:
5495       if field == "cluster_name":
5496         entry = self.cfg.GetClusterName()
5497       elif field == "master_node":
5498         entry = self.cfg.GetMasterNode()
5499       elif field == "drain_flag":
5500         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5501       elif field == "watcher_pause":
5502         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5503       elif field == "volume_group_name":
5504         entry = self.cfg.GetVGName()
5505       else:
5506         raise errors.ParameterError(field)
5507       values.append(entry)
5508     return values
5509
5510
5511 class LUInstanceActivateDisks(NoHooksLU):
5512   """Bring up an instance's disks.
5513
5514   """
5515   REQ_BGL = False
5516
5517   def ExpandNames(self):
5518     self._ExpandAndLockInstance()
5519     self.needed_locks[locking.LEVEL_NODE] = []
5520     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5521
5522   def DeclareLocks(self, level):
5523     if level == locking.LEVEL_NODE:
5524       self._LockInstancesNodes()
5525
5526   def CheckPrereq(self):
5527     """Check prerequisites.
5528
5529     This checks that the instance is in the cluster.
5530
5531     """
5532     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5533     assert self.instance is not None, \
5534       "Cannot retrieve locked instance %s" % self.op.instance_name
5535     _CheckNodeOnline(self, self.instance.primary_node)
5536
5537   def Exec(self, feedback_fn):
5538     """Activate the disks.
5539
5540     """
5541     disks_ok, disks_info = \
5542               _AssembleInstanceDisks(self, self.instance,
5543                                      ignore_size=self.op.ignore_size)
5544     if not disks_ok:
5545       raise errors.OpExecError("Cannot activate block devices")
5546
5547     return disks_info
5548
5549
5550 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5551                            ignore_size=False):
5552   """Prepare the block devices for an instance.
5553
5554   This sets up the block devices on all nodes.
5555
5556   @type lu: L{LogicalUnit}
5557   @param lu: the logical unit on whose behalf we execute
5558   @type instance: L{objects.Instance}
5559   @param instance: the instance for whose disks we assemble
5560   @type disks: list of L{objects.Disk} or None
5561   @param disks: which disks to assemble (or all, if None)
5562   @type ignore_secondaries: boolean
5563   @param ignore_secondaries: if true, errors on secondary nodes
5564       won't result in an error return from the function
5565   @type ignore_size: boolean
5566   @param ignore_size: if true, the current known size of the disk
5567       will not be used during the disk activation, useful for cases
5568       when the size is wrong
5569   @return: False if the operation failed, otherwise a list of
5570       (host, instance_visible_name, node_visible_name)
5571       with the mapping from node devices to instance devices
5572
5573   """
5574   device_info = []
5575   disks_ok = True
5576   iname = instance.name
5577   disks = _ExpandCheckDisks(instance, disks)
5578
5579   # With the two passes mechanism we try to reduce the window of
5580   # opportunity for the race condition of switching DRBD to primary
5581   # before handshaking occured, but we do not eliminate it
5582
5583   # The proper fix would be to wait (with some limits) until the
5584   # connection has been made and drbd transitions from WFConnection
5585   # into any other network-connected state (Connected, SyncTarget,
5586   # SyncSource, etc.)
5587
5588   # 1st pass, assemble on all nodes in secondary mode
5589   for idx, inst_disk in enumerate(disks):
5590     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5591       if ignore_size:
5592         node_disk = node_disk.Copy()
5593         node_disk.UnsetSize()
5594       lu.cfg.SetDiskID(node_disk, node)
5595       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5596       msg = result.fail_msg
5597       if msg:
5598         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5599                            " (is_primary=False, pass=1): %s",
5600                            inst_disk.iv_name, node, msg)
5601         if not ignore_secondaries:
5602           disks_ok = False
5603
5604   # FIXME: race condition on drbd migration to primary
5605
5606   # 2nd pass, do only the primary node
5607   for idx, inst_disk in enumerate(disks):
5608     dev_path = None
5609
5610     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5611       if node != instance.primary_node:
5612         continue
5613       if ignore_size:
5614         node_disk = node_disk.Copy()
5615         node_disk.UnsetSize()
5616       lu.cfg.SetDiskID(node_disk, node)
5617       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5618       msg = result.fail_msg
5619       if msg:
5620         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5621                            " (is_primary=True, pass=2): %s",
5622                            inst_disk.iv_name, node, msg)
5623         disks_ok = False
5624       else:
5625         dev_path = result.payload
5626
5627     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5628
5629   # leave the disks configured for the primary node
5630   # this is a workaround that would be fixed better by
5631   # improving the logical/physical id handling
5632   for disk in disks:
5633     lu.cfg.SetDiskID(disk, instance.primary_node)
5634
5635   return disks_ok, device_info
5636
5637
5638 def _StartInstanceDisks(lu, instance, force):
5639   """Start the disks of an instance.
5640
5641   """
5642   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5643                                            ignore_secondaries=force)
5644   if not disks_ok:
5645     _ShutdownInstanceDisks(lu, instance)
5646     if force is not None and not force:
5647       lu.proc.LogWarning("", hint="If the message above refers to a"
5648                          " secondary node,"
5649                          " you can retry the operation using '--force'.")
5650     raise errors.OpExecError("Disk consistency error")
5651
5652
5653 class LUInstanceDeactivateDisks(NoHooksLU):
5654   """Shutdown an instance's disks.
5655
5656   """
5657   REQ_BGL = False
5658
5659   def ExpandNames(self):
5660     self._ExpandAndLockInstance()
5661     self.needed_locks[locking.LEVEL_NODE] = []
5662     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5663
5664   def DeclareLocks(self, level):
5665     if level == locking.LEVEL_NODE:
5666       self._LockInstancesNodes()
5667
5668   def CheckPrereq(self):
5669     """Check prerequisites.
5670
5671     This checks that the instance is in the cluster.
5672
5673     """
5674     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5675     assert self.instance is not None, \
5676       "Cannot retrieve locked instance %s" % self.op.instance_name
5677
5678   def Exec(self, feedback_fn):
5679     """Deactivate the disks
5680
5681     """
5682     instance = self.instance
5683     if self.op.force:
5684       _ShutdownInstanceDisks(self, instance)
5685     else:
5686       _SafeShutdownInstanceDisks(self, instance)
5687
5688
5689 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5690   """Shutdown block devices of an instance.
5691
5692   This function checks if an instance is running, before calling
5693   _ShutdownInstanceDisks.
5694
5695   """
5696   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5697   _ShutdownInstanceDisks(lu, instance, disks=disks)
5698
5699
5700 def _ExpandCheckDisks(instance, disks):
5701   """Return the instance disks selected by the disks list
5702
5703   @type disks: list of L{objects.Disk} or None
5704   @param disks: selected disks
5705   @rtype: list of L{objects.Disk}
5706   @return: selected instance disks to act on
5707
5708   """
5709   if disks is None:
5710     return instance.disks
5711   else:
5712     if not set(disks).issubset(instance.disks):
5713       raise errors.ProgrammerError("Can only act on disks belonging to the"
5714                                    " target instance")
5715     return disks
5716
5717
5718 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5719   """Shutdown block devices of an instance.
5720
5721   This does the shutdown on all nodes of the instance.
5722
5723   If the ignore_primary is false, errors on the primary node are
5724   ignored.
5725
5726   """
5727   all_result = True
5728   disks = _ExpandCheckDisks(instance, disks)
5729
5730   for disk in disks:
5731     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5732       lu.cfg.SetDiskID(top_disk, node)
5733       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5734       msg = result.fail_msg
5735       if msg:
5736         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5737                       disk.iv_name, node, msg)
5738         if ((node == instance.primary_node and not ignore_primary) or
5739             (node != instance.primary_node and not result.offline)):
5740           all_result = False
5741   return all_result
5742
5743
5744 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5745   """Checks if a node has enough free memory.
5746
5747   This function check if a given node has the needed amount of free
5748   memory. In case the node has less memory or we cannot get the
5749   information from the node, this function raise an OpPrereqError
5750   exception.
5751
5752   @type lu: C{LogicalUnit}
5753   @param lu: a logical unit from which we get configuration data
5754   @type node: C{str}
5755   @param node: the node to check
5756   @type reason: C{str}
5757   @param reason: string to use in the error message
5758   @type requested: C{int}
5759   @param requested: the amount of memory in MiB to check for
5760   @type hypervisor_name: C{str}
5761   @param hypervisor_name: the hypervisor to ask for memory stats
5762   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5763       we cannot check the node
5764
5765   """
5766   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5767   nodeinfo[node].Raise("Can't get data from node %s" % node,
5768                        prereq=True, ecode=errors.ECODE_ENVIRON)
5769   free_mem = nodeinfo[node].payload.get("memory_free", None)
5770   if not isinstance(free_mem, int):
5771     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5772                                " was '%s'" % (node, free_mem),
5773                                errors.ECODE_ENVIRON)
5774   if requested > free_mem:
5775     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5776                                " needed %s MiB, available %s MiB" %
5777                                (node, reason, requested, free_mem),
5778                                errors.ECODE_NORES)
5779
5780
5781 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5782   """Checks if nodes have enough free disk space in the all VGs.
5783
5784   This function check if all given nodes have the needed amount of
5785   free disk. In case any node has less disk or we cannot get the
5786   information from the node, this function raise an OpPrereqError
5787   exception.
5788
5789   @type lu: C{LogicalUnit}
5790   @param lu: a logical unit from which we get configuration data
5791   @type nodenames: C{list}
5792   @param nodenames: the list of node names to check
5793   @type req_sizes: C{dict}
5794   @param req_sizes: the hash of vg and corresponding amount of disk in
5795       MiB to check for
5796   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5797       or we cannot check the node
5798
5799   """
5800   for vg, req_size in req_sizes.items():
5801     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5802
5803
5804 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5805   """Checks if nodes have enough free disk space in the specified VG.
5806
5807   This function check if all given nodes have the needed amount of
5808   free disk. In case any node has less disk or we cannot get the
5809   information from the node, this function raise an OpPrereqError
5810   exception.
5811
5812   @type lu: C{LogicalUnit}
5813   @param lu: a logical unit from which we get configuration data
5814   @type nodenames: C{list}
5815   @param nodenames: the list of node names to check
5816   @type vg: C{str}
5817   @param vg: the volume group to check
5818   @type requested: C{int}
5819   @param requested: the amount of disk in MiB to check for
5820   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5821       or we cannot check the node
5822
5823   """
5824   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5825   for node in nodenames:
5826     info = nodeinfo[node]
5827     info.Raise("Cannot get current information from node %s" % node,
5828                prereq=True, ecode=errors.ECODE_ENVIRON)
5829     vg_free = info.payload.get("vg_free", None)
5830     if not isinstance(vg_free, int):
5831       raise errors.OpPrereqError("Can't compute free disk space on node"
5832                                  " %s for vg %s, result was '%s'" %
5833                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5834     if requested > vg_free:
5835       raise errors.OpPrereqError("Not enough disk space on target node %s"
5836                                  " vg %s: required %d MiB, available %d MiB" %
5837                                  (node, vg, requested, vg_free),
5838                                  errors.ECODE_NORES)
5839
5840
5841 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5842   """Checks if nodes have enough physical CPUs
5843
5844   This function checks if all given nodes have the needed number of
5845   physical CPUs. In case any node has less CPUs or we cannot get the
5846   information from the node, this function raises an OpPrereqError
5847   exception.
5848
5849   @type lu: C{LogicalUnit}
5850   @param lu: a logical unit from which we get configuration data
5851   @type nodenames: C{list}
5852   @param nodenames: the list of node names to check
5853   @type requested: C{int}
5854   @param requested: the minimum acceptable number of physical CPUs
5855   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5856       or we cannot check the node
5857
5858   """
5859   nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
5860   for node in nodenames:
5861     info = nodeinfo[node]
5862     info.Raise("Cannot get current information from node %s" % node,
5863                prereq=True, ecode=errors.ECODE_ENVIRON)
5864     num_cpus = info.payload.get("cpu_total", None)
5865     if not isinstance(num_cpus, int):
5866       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
5867                                  " on node %s, result was '%s'" %
5868                                  (node, num_cpus), errors.ECODE_ENVIRON)
5869     if requested > num_cpus:
5870       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
5871                                  "required" % (node, num_cpus, requested),
5872                                  errors.ECODE_NORES)
5873
5874
5875 class LUInstanceStartup(LogicalUnit):
5876   """Starts an instance.
5877
5878   """
5879   HPATH = "instance-start"
5880   HTYPE = constants.HTYPE_INSTANCE
5881   REQ_BGL = False
5882
5883   def CheckArguments(self):
5884     # extra beparams
5885     if self.op.beparams:
5886       # fill the beparams dict
5887       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5888
5889   def ExpandNames(self):
5890     self._ExpandAndLockInstance()
5891
5892   def BuildHooksEnv(self):
5893     """Build hooks env.
5894
5895     This runs on master, primary and secondary nodes of the instance.
5896
5897     """
5898     env = {
5899       "FORCE": self.op.force,
5900       }
5901
5902     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5903
5904     return env
5905
5906   def BuildHooksNodes(self):
5907     """Build hooks nodes.
5908
5909     """
5910     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5911     return (nl, nl)
5912
5913   def CheckPrereq(self):
5914     """Check prerequisites.
5915
5916     This checks that the instance is in the cluster.
5917
5918     """
5919     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5920     assert self.instance is not None, \
5921       "Cannot retrieve locked instance %s" % self.op.instance_name
5922
5923     # extra hvparams
5924     if self.op.hvparams:
5925       # check hypervisor parameter syntax (locally)
5926       cluster = self.cfg.GetClusterInfo()
5927       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5928       filled_hvp = cluster.FillHV(instance)
5929       filled_hvp.update(self.op.hvparams)
5930       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5931       hv_type.CheckParameterSyntax(filled_hvp)
5932       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5933
5934     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5935
5936     if self.primary_offline and self.op.ignore_offline_nodes:
5937       self.proc.LogWarning("Ignoring offline primary node")
5938
5939       if self.op.hvparams or self.op.beparams:
5940         self.proc.LogWarning("Overridden parameters are ignored")
5941     else:
5942       _CheckNodeOnline(self, instance.primary_node)
5943
5944       bep = self.cfg.GetClusterInfo().FillBE(instance)
5945
5946       # check bridges existence
5947       _CheckInstanceBridgesExist(self, instance)
5948
5949       remote_info = self.rpc.call_instance_info(instance.primary_node,
5950                                                 instance.name,
5951                                                 instance.hypervisor)
5952       remote_info.Raise("Error checking node %s" % instance.primary_node,
5953                         prereq=True, ecode=errors.ECODE_ENVIRON)
5954       if not remote_info.payload: # not running already
5955         _CheckNodeFreeMemory(self, instance.primary_node,
5956                              "starting instance %s" % instance.name,
5957                              bep[constants.BE_MEMORY], instance.hypervisor)
5958
5959   def Exec(self, feedback_fn):
5960     """Start the instance.
5961
5962     """
5963     instance = self.instance
5964     force = self.op.force
5965
5966     if not self.op.no_remember:
5967       self.cfg.MarkInstanceUp(instance.name)
5968
5969     if self.primary_offline:
5970       assert self.op.ignore_offline_nodes
5971       self.proc.LogInfo("Primary node offline, marked instance as started")
5972     else:
5973       node_current = instance.primary_node
5974
5975       _StartInstanceDisks(self, instance, force)
5976
5977       result = self.rpc.call_instance_start(node_current, instance,
5978                                             self.op.hvparams, self.op.beparams,
5979                                             self.op.startup_paused)
5980       msg = result.fail_msg
5981       if msg:
5982         _ShutdownInstanceDisks(self, instance)
5983         raise errors.OpExecError("Could not start instance: %s" % msg)
5984
5985
5986 class LUInstanceReboot(LogicalUnit):
5987   """Reboot an instance.
5988
5989   """
5990   HPATH = "instance-reboot"
5991   HTYPE = constants.HTYPE_INSTANCE
5992   REQ_BGL = False
5993
5994   def ExpandNames(self):
5995     self._ExpandAndLockInstance()
5996
5997   def BuildHooksEnv(self):
5998     """Build hooks env.
5999
6000     This runs on master, primary and secondary nodes of the instance.
6001
6002     """
6003     env = {
6004       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6005       "REBOOT_TYPE": self.op.reboot_type,
6006       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6007       }
6008
6009     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6010
6011     return env
6012
6013   def BuildHooksNodes(self):
6014     """Build hooks nodes.
6015
6016     """
6017     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6018     return (nl, nl)
6019
6020   def CheckPrereq(self):
6021     """Check prerequisites.
6022
6023     This checks that the instance is in the cluster.
6024
6025     """
6026     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6027     assert self.instance is not None, \
6028       "Cannot retrieve locked instance %s" % self.op.instance_name
6029
6030     _CheckNodeOnline(self, instance.primary_node)
6031
6032     # check bridges existence
6033     _CheckInstanceBridgesExist(self, instance)
6034
6035   def Exec(self, feedback_fn):
6036     """Reboot the instance.
6037
6038     """
6039     instance = self.instance
6040     ignore_secondaries = self.op.ignore_secondaries
6041     reboot_type = self.op.reboot_type
6042
6043     remote_info = self.rpc.call_instance_info(instance.primary_node,
6044                                               instance.name,
6045                                               instance.hypervisor)
6046     remote_info.Raise("Error checking node %s" % instance.primary_node)
6047     instance_running = bool(remote_info.payload)
6048
6049     node_current = instance.primary_node
6050
6051     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6052                                             constants.INSTANCE_REBOOT_HARD]:
6053       for disk in instance.disks:
6054         self.cfg.SetDiskID(disk, node_current)
6055       result = self.rpc.call_instance_reboot(node_current, instance,
6056                                              reboot_type,
6057                                              self.op.shutdown_timeout)
6058       result.Raise("Could not reboot instance")
6059     else:
6060       if instance_running:
6061         result = self.rpc.call_instance_shutdown(node_current, instance,
6062                                                  self.op.shutdown_timeout)
6063         result.Raise("Could not shutdown instance for full reboot")
6064         _ShutdownInstanceDisks(self, instance)
6065       else:
6066         self.LogInfo("Instance %s was already stopped, starting now",
6067                      instance.name)
6068       _StartInstanceDisks(self, instance, ignore_secondaries)
6069       result = self.rpc.call_instance_start(node_current, instance,
6070                                             None, None, False)
6071       msg = result.fail_msg
6072       if msg:
6073         _ShutdownInstanceDisks(self, instance)
6074         raise errors.OpExecError("Could not start instance for"
6075                                  " full reboot: %s" % msg)
6076
6077     self.cfg.MarkInstanceUp(instance.name)
6078
6079
6080 class LUInstanceShutdown(LogicalUnit):
6081   """Shutdown an instance.
6082
6083   """
6084   HPATH = "instance-stop"
6085   HTYPE = constants.HTYPE_INSTANCE
6086   REQ_BGL = False
6087
6088   def ExpandNames(self):
6089     self._ExpandAndLockInstance()
6090
6091   def BuildHooksEnv(self):
6092     """Build hooks env.
6093
6094     This runs on master, primary and secondary nodes of the instance.
6095
6096     """
6097     env = _BuildInstanceHookEnvByObject(self, self.instance)
6098     env["TIMEOUT"] = self.op.timeout
6099     return env
6100
6101   def BuildHooksNodes(self):
6102     """Build hooks nodes.
6103
6104     """
6105     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6106     return (nl, nl)
6107
6108   def CheckPrereq(self):
6109     """Check prerequisites.
6110
6111     This checks that the instance is in the cluster.
6112
6113     """
6114     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6115     assert self.instance is not None, \
6116       "Cannot retrieve locked instance %s" % self.op.instance_name
6117
6118     self.primary_offline = \
6119       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6120
6121     if self.primary_offline and self.op.ignore_offline_nodes:
6122       self.proc.LogWarning("Ignoring offline primary node")
6123     else:
6124       _CheckNodeOnline(self, self.instance.primary_node)
6125
6126   def Exec(self, feedback_fn):
6127     """Shutdown the instance.
6128
6129     """
6130     instance = self.instance
6131     node_current = instance.primary_node
6132     timeout = self.op.timeout
6133
6134     if not self.op.no_remember:
6135       self.cfg.MarkInstanceDown(instance.name)
6136
6137     if self.primary_offline:
6138       assert self.op.ignore_offline_nodes
6139       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6140     else:
6141       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6142       msg = result.fail_msg
6143       if msg:
6144         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6145
6146       _ShutdownInstanceDisks(self, instance)
6147
6148
6149 class LUInstanceReinstall(LogicalUnit):
6150   """Reinstall an instance.
6151
6152   """
6153   HPATH = "instance-reinstall"
6154   HTYPE = constants.HTYPE_INSTANCE
6155   REQ_BGL = False
6156
6157   def ExpandNames(self):
6158     self._ExpandAndLockInstance()
6159
6160   def BuildHooksEnv(self):
6161     """Build hooks env.
6162
6163     This runs on master, primary and secondary nodes of the instance.
6164
6165     """
6166     return _BuildInstanceHookEnvByObject(self, self.instance)
6167
6168   def BuildHooksNodes(self):
6169     """Build hooks nodes.
6170
6171     """
6172     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6173     return (nl, nl)
6174
6175   def CheckPrereq(self):
6176     """Check prerequisites.
6177
6178     This checks that the instance is in the cluster and is not running.
6179
6180     """
6181     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6182     assert instance is not None, \
6183       "Cannot retrieve locked instance %s" % self.op.instance_name
6184     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6185                      " offline, cannot reinstall")
6186     for node in instance.secondary_nodes:
6187       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6188                        " cannot reinstall")
6189
6190     if instance.disk_template == constants.DT_DISKLESS:
6191       raise errors.OpPrereqError("Instance '%s' has no disks" %
6192                                  self.op.instance_name,
6193                                  errors.ECODE_INVAL)
6194     _CheckInstanceDown(self, instance, "cannot reinstall")
6195
6196     if self.op.os_type is not None:
6197       # OS verification
6198       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6199       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6200       instance_os = self.op.os_type
6201     else:
6202       instance_os = instance.os
6203
6204     nodelist = list(instance.all_nodes)
6205
6206     if self.op.osparams:
6207       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6208       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6209       self.os_inst = i_osdict # the new dict (without defaults)
6210     else:
6211       self.os_inst = None
6212
6213     self.instance = instance
6214
6215   def Exec(self, feedback_fn):
6216     """Reinstall the instance.
6217
6218     """
6219     inst = self.instance
6220
6221     if self.op.os_type is not None:
6222       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6223       inst.os = self.op.os_type
6224       # Write to configuration
6225       self.cfg.Update(inst, feedback_fn)
6226
6227     _StartInstanceDisks(self, inst, None)
6228     try:
6229       feedback_fn("Running the instance OS create scripts...")
6230       # FIXME: pass debug option from opcode to backend
6231       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6232                                              self.op.debug_level,
6233                                              osparams=self.os_inst)
6234       result.Raise("Could not install OS for instance %s on node %s" %
6235                    (inst.name, inst.primary_node))
6236     finally:
6237       _ShutdownInstanceDisks(self, inst)
6238
6239
6240 class LUInstanceRecreateDisks(LogicalUnit):
6241   """Recreate an instance's missing disks.
6242
6243   """
6244   HPATH = "instance-recreate-disks"
6245   HTYPE = constants.HTYPE_INSTANCE
6246   REQ_BGL = False
6247
6248   def CheckArguments(self):
6249     # normalise the disk list
6250     self.op.disks = sorted(frozenset(self.op.disks))
6251
6252   def ExpandNames(self):
6253     self._ExpandAndLockInstance()
6254     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6255     if self.op.nodes:
6256       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6257       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6258     else:
6259       self.needed_locks[locking.LEVEL_NODE] = []
6260
6261   def DeclareLocks(self, level):
6262     if level == locking.LEVEL_NODE:
6263       # if we replace the nodes, we only need to lock the old primary,
6264       # otherwise we need to lock all nodes for disk re-creation
6265       primary_only = bool(self.op.nodes)
6266       self._LockInstancesNodes(primary_only=primary_only)
6267
6268   def BuildHooksEnv(self):
6269     """Build hooks env.
6270
6271     This runs on master, primary and secondary nodes of the instance.
6272
6273     """
6274     return _BuildInstanceHookEnvByObject(self, self.instance)
6275
6276   def BuildHooksNodes(self):
6277     """Build hooks nodes.
6278
6279     """
6280     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6281     return (nl, nl)
6282
6283   def CheckPrereq(self):
6284     """Check prerequisites.
6285
6286     This checks that the instance is in the cluster and is not running.
6287
6288     """
6289     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6290     assert instance is not None, \
6291       "Cannot retrieve locked instance %s" % self.op.instance_name
6292     if self.op.nodes:
6293       if len(self.op.nodes) != len(instance.all_nodes):
6294         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6295                                    " %d replacement nodes were specified" %
6296                                    (instance.name, len(instance.all_nodes),
6297                                     len(self.op.nodes)),
6298                                    errors.ECODE_INVAL)
6299       assert instance.disk_template != constants.DT_DRBD8 or \
6300           len(self.op.nodes) == 2
6301       assert instance.disk_template != constants.DT_PLAIN or \
6302           len(self.op.nodes) == 1
6303       primary_node = self.op.nodes[0]
6304     else:
6305       primary_node = instance.primary_node
6306     _CheckNodeOnline(self, primary_node)
6307
6308     if instance.disk_template == constants.DT_DISKLESS:
6309       raise errors.OpPrereqError("Instance '%s' has no disks" %
6310                                  self.op.instance_name, errors.ECODE_INVAL)
6311     # if we replace nodes *and* the old primary is offline, we don't
6312     # check
6313     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6314     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6315     if not (self.op.nodes and old_pnode.offline):
6316       _CheckInstanceDown(self, instance, "cannot recreate disks")
6317
6318     if not self.op.disks:
6319       self.op.disks = range(len(instance.disks))
6320     else:
6321       for idx in self.op.disks:
6322         if idx >= len(instance.disks):
6323           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6324                                      errors.ECODE_INVAL)
6325     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6326       raise errors.OpPrereqError("Can't recreate disks partially and"
6327                                  " change the nodes at the same time",
6328                                  errors.ECODE_INVAL)
6329     self.instance = instance
6330
6331   def Exec(self, feedback_fn):
6332     """Recreate the disks.
6333
6334     """
6335     instance = self.instance
6336
6337     to_skip = []
6338     mods = [] # keeps track of needed logical_id changes
6339
6340     for idx, disk in enumerate(instance.disks):
6341       if idx not in self.op.disks: # disk idx has not been passed in
6342         to_skip.append(idx)
6343         continue
6344       # update secondaries for disks, if needed
6345       if self.op.nodes:
6346         if disk.dev_type == constants.LD_DRBD8:
6347           # need to update the nodes and minors
6348           assert len(self.op.nodes) == 2
6349           assert len(disk.logical_id) == 6 # otherwise disk internals
6350                                            # have changed
6351           (_, _, old_port, _, _, old_secret) = disk.logical_id
6352           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6353           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6354                     new_minors[0], new_minors[1], old_secret)
6355           assert len(disk.logical_id) == len(new_id)
6356           mods.append((idx, new_id))
6357
6358     # now that we have passed all asserts above, we can apply the mods
6359     # in a single run (to avoid partial changes)
6360     for idx, new_id in mods:
6361       instance.disks[idx].logical_id = new_id
6362
6363     # change primary node, if needed
6364     if self.op.nodes:
6365       instance.primary_node = self.op.nodes[0]
6366       self.LogWarning("Changing the instance's nodes, you will have to"
6367                       " remove any disks left on the older nodes manually")
6368
6369     if self.op.nodes:
6370       self.cfg.Update(instance, feedback_fn)
6371
6372     _CreateDisks(self, instance, to_skip=to_skip)
6373
6374
6375 class LUInstanceRename(LogicalUnit):
6376   """Rename an instance.
6377
6378   """
6379   HPATH = "instance-rename"
6380   HTYPE = constants.HTYPE_INSTANCE
6381
6382   def CheckArguments(self):
6383     """Check arguments.
6384
6385     """
6386     if self.op.ip_check and not self.op.name_check:
6387       # TODO: make the ip check more flexible and not depend on the name check
6388       raise errors.OpPrereqError("IP address check requires a name check",
6389                                  errors.ECODE_INVAL)
6390
6391   def BuildHooksEnv(self):
6392     """Build hooks env.
6393
6394     This runs on master, primary and secondary nodes of the instance.
6395
6396     """
6397     env = _BuildInstanceHookEnvByObject(self, self.instance)
6398     env["INSTANCE_NEW_NAME"] = self.op.new_name
6399     return env
6400
6401   def BuildHooksNodes(self):
6402     """Build hooks nodes.
6403
6404     """
6405     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6406     return (nl, nl)
6407
6408   def CheckPrereq(self):
6409     """Check prerequisites.
6410
6411     This checks that the instance is in the cluster and is not running.
6412
6413     """
6414     self.op.instance_name = _ExpandInstanceName(self.cfg,
6415                                                 self.op.instance_name)
6416     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6417     assert instance is not None
6418     _CheckNodeOnline(self, instance.primary_node)
6419     _CheckInstanceDown(self, instance, "cannot rename")
6420     self.instance = instance
6421
6422     new_name = self.op.new_name
6423     if self.op.name_check:
6424       hostname = netutils.GetHostname(name=new_name)
6425       if hostname != new_name:
6426         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6427                      hostname.name)
6428       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6429         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6430                                     " same as given hostname '%s'") %
6431                                     (hostname.name, self.op.new_name),
6432                                     errors.ECODE_INVAL)
6433       new_name = self.op.new_name = hostname.name
6434       if (self.op.ip_check and
6435           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6436         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6437                                    (hostname.ip, new_name),
6438                                    errors.ECODE_NOTUNIQUE)
6439
6440     instance_list = self.cfg.GetInstanceList()
6441     if new_name in instance_list and new_name != instance.name:
6442       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6443                                  new_name, errors.ECODE_EXISTS)
6444
6445   def Exec(self, feedback_fn):
6446     """Rename the instance.
6447
6448     """
6449     inst = self.instance
6450     old_name = inst.name
6451
6452     rename_file_storage = False
6453     if (inst.disk_template in constants.DTS_FILEBASED and
6454         self.op.new_name != inst.name):
6455       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6456       rename_file_storage = True
6457
6458     self.cfg.RenameInstance(inst.name, self.op.new_name)
6459     # Change the instance lock. This is definitely safe while we hold the BGL.
6460     # Otherwise the new lock would have to be added in acquired mode.
6461     assert self.REQ_BGL
6462     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6463     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6464
6465     # re-read the instance from the configuration after rename
6466     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6467
6468     if rename_file_storage:
6469       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6470       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6471                                                      old_file_storage_dir,
6472                                                      new_file_storage_dir)
6473       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6474                    " (but the instance has been renamed in Ganeti)" %
6475                    (inst.primary_node, old_file_storage_dir,
6476                     new_file_storage_dir))
6477
6478     _StartInstanceDisks(self, inst, None)
6479     try:
6480       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6481                                                  old_name, self.op.debug_level)
6482       msg = result.fail_msg
6483       if msg:
6484         msg = ("Could not run OS rename script for instance %s on node %s"
6485                " (but the instance has been renamed in Ganeti): %s" %
6486                (inst.name, inst.primary_node, msg))
6487         self.proc.LogWarning(msg)
6488     finally:
6489       _ShutdownInstanceDisks(self, inst)
6490
6491     return inst.name
6492
6493
6494 class LUInstanceRemove(LogicalUnit):
6495   """Remove an instance.
6496
6497   """
6498   HPATH = "instance-remove"
6499   HTYPE = constants.HTYPE_INSTANCE
6500   REQ_BGL = False
6501
6502   def ExpandNames(self):
6503     self._ExpandAndLockInstance()
6504     self.needed_locks[locking.LEVEL_NODE] = []
6505     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6506
6507   def DeclareLocks(self, level):
6508     if level == locking.LEVEL_NODE:
6509       self._LockInstancesNodes()
6510
6511   def BuildHooksEnv(self):
6512     """Build hooks env.
6513
6514     This runs on master, primary and secondary nodes of the instance.
6515
6516     """
6517     env = _BuildInstanceHookEnvByObject(self, self.instance)
6518     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6519     return env
6520
6521   def BuildHooksNodes(self):
6522     """Build hooks nodes.
6523
6524     """
6525     nl = [self.cfg.GetMasterNode()]
6526     nl_post = list(self.instance.all_nodes) + nl
6527     return (nl, nl_post)
6528
6529   def CheckPrereq(self):
6530     """Check prerequisites.
6531
6532     This checks that the instance is in the cluster.
6533
6534     """
6535     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6536     assert self.instance is not None, \
6537       "Cannot retrieve locked instance %s" % self.op.instance_name
6538
6539   def Exec(self, feedback_fn):
6540     """Remove the instance.
6541
6542     """
6543     instance = self.instance
6544     logging.info("Shutting down instance %s on node %s",
6545                  instance.name, instance.primary_node)
6546
6547     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6548                                              self.op.shutdown_timeout)
6549     msg = result.fail_msg
6550     if msg:
6551       if self.op.ignore_failures:
6552         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6553       else:
6554         raise errors.OpExecError("Could not shutdown instance %s on"
6555                                  " node %s: %s" %
6556                                  (instance.name, instance.primary_node, msg))
6557
6558     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6559
6560
6561 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6562   """Utility function to remove an instance.
6563
6564   """
6565   logging.info("Removing block devices for instance %s", instance.name)
6566
6567   if not _RemoveDisks(lu, instance):
6568     if not ignore_failures:
6569       raise errors.OpExecError("Can't remove instance's disks")
6570     feedback_fn("Warning: can't remove instance's disks")
6571
6572   logging.info("Removing instance %s out of cluster config", instance.name)
6573
6574   lu.cfg.RemoveInstance(instance.name)
6575
6576   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6577     "Instance lock removal conflict"
6578
6579   # Remove lock for the instance
6580   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6581
6582
6583 class LUInstanceQuery(NoHooksLU):
6584   """Logical unit for querying instances.
6585
6586   """
6587   # pylint: disable=W0142
6588   REQ_BGL = False
6589
6590   def CheckArguments(self):
6591     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6592                              self.op.output_fields, self.op.use_locking)
6593
6594   def ExpandNames(self):
6595     self.iq.ExpandNames(self)
6596
6597   def DeclareLocks(self, level):
6598     self.iq.DeclareLocks(self, level)
6599
6600   def Exec(self, feedback_fn):
6601     return self.iq.OldStyleQuery(self)
6602
6603
6604 class LUInstanceFailover(LogicalUnit):
6605   """Failover an instance.
6606
6607   """
6608   HPATH = "instance-failover"
6609   HTYPE = constants.HTYPE_INSTANCE
6610   REQ_BGL = False
6611
6612   def CheckArguments(self):
6613     """Check the arguments.
6614
6615     """
6616     self.iallocator = getattr(self.op, "iallocator", None)
6617     self.target_node = getattr(self.op, "target_node", None)
6618
6619   def ExpandNames(self):
6620     self._ExpandAndLockInstance()
6621
6622     if self.op.target_node is not None:
6623       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6624
6625     self.needed_locks[locking.LEVEL_NODE] = []
6626     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6627
6628     ignore_consistency = self.op.ignore_consistency
6629     shutdown_timeout = self.op.shutdown_timeout
6630     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6631                                        cleanup=False,
6632                                        failover=True,
6633                                        ignore_consistency=ignore_consistency,
6634                                        shutdown_timeout=shutdown_timeout)
6635     self.tasklets = [self._migrater]
6636
6637   def DeclareLocks(self, level):
6638     if level == locking.LEVEL_NODE:
6639       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6640       if instance.disk_template in constants.DTS_EXT_MIRROR:
6641         if self.op.target_node is None:
6642           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6643         else:
6644           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6645                                                    self.op.target_node]
6646         del self.recalculate_locks[locking.LEVEL_NODE]
6647       else:
6648         self._LockInstancesNodes()
6649
6650   def BuildHooksEnv(self):
6651     """Build hooks env.
6652
6653     This runs on master, primary and secondary nodes of the instance.
6654
6655     """
6656     instance = self._migrater.instance
6657     source_node = instance.primary_node
6658     target_node = self.op.target_node
6659     env = {
6660       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6661       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6662       "OLD_PRIMARY": source_node,
6663       "NEW_PRIMARY": target_node,
6664       }
6665
6666     if instance.disk_template in constants.DTS_INT_MIRROR:
6667       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6668       env["NEW_SECONDARY"] = source_node
6669     else:
6670       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6671
6672     env.update(_BuildInstanceHookEnvByObject(self, instance))
6673
6674     return env
6675
6676   def BuildHooksNodes(self):
6677     """Build hooks nodes.
6678
6679     """
6680     instance = self._migrater.instance
6681     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6682     return (nl, nl + [instance.primary_node])
6683
6684
6685 class LUInstanceMigrate(LogicalUnit):
6686   """Migrate an instance.
6687
6688   This is migration without shutting down, compared to the failover,
6689   which is done with shutdown.
6690
6691   """
6692   HPATH = "instance-migrate"
6693   HTYPE = constants.HTYPE_INSTANCE
6694   REQ_BGL = False
6695
6696   def ExpandNames(self):
6697     self._ExpandAndLockInstance()
6698
6699     if self.op.target_node is not None:
6700       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6701
6702     self.needed_locks[locking.LEVEL_NODE] = []
6703     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6704
6705     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6706                                        cleanup=self.op.cleanup,
6707                                        failover=False,
6708                                        fallback=self.op.allow_failover)
6709     self.tasklets = [self._migrater]
6710
6711   def DeclareLocks(self, level):
6712     if level == locking.LEVEL_NODE:
6713       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6714       if instance.disk_template in constants.DTS_EXT_MIRROR:
6715         if self.op.target_node is None:
6716           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6717         else:
6718           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6719                                                    self.op.target_node]
6720         del self.recalculate_locks[locking.LEVEL_NODE]
6721       else:
6722         self._LockInstancesNodes()
6723
6724   def BuildHooksEnv(self):
6725     """Build hooks env.
6726
6727     This runs on master, primary and secondary nodes of the instance.
6728
6729     """
6730     instance = self._migrater.instance
6731     source_node = instance.primary_node
6732     target_node = self.op.target_node
6733     env = _BuildInstanceHookEnvByObject(self, instance)
6734     env.update({
6735       "MIGRATE_LIVE": self._migrater.live,
6736       "MIGRATE_CLEANUP": self.op.cleanup,
6737       "OLD_PRIMARY": source_node,
6738       "NEW_PRIMARY": target_node,
6739       })
6740
6741     if instance.disk_template in constants.DTS_INT_MIRROR:
6742       env["OLD_SECONDARY"] = target_node
6743       env["NEW_SECONDARY"] = source_node
6744     else:
6745       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6746
6747     return env
6748
6749   def BuildHooksNodes(self):
6750     """Build hooks nodes.
6751
6752     """
6753     instance = self._migrater.instance
6754     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6755     return (nl, nl + [instance.primary_node])
6756
6757
6758 class LUInstanceMove(LogicalUnit):
6759   """Move an instance by data-copying.
6760
6761   """
6762   HPATH = "instance-move"
6763   HTYPE = constants.HTYPE_INSTANCE
6764   REQ_BGL = False
6765
6766   def ExpandNames(self):
6767     self._ExpandAndLockInstance()
6768     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6769     self.op.target_node = target_node
6770     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6771     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6772
6773   def DeclareLocks(self, level):
6774     if level == locking.LEVEL_NODE:
6775       self._LockInstancesNodes(primary_only=True)
6776
6777   def BuildHooksEnv(self):
6778     """Build hooks env.
6779
6780     This runs on master, primary and secondary nodes of the instance.
6781
6782     """
6783     env = {
6784       "TARGET_NODE": self.op.target_node,
6785       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6786       }
6787     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6788     return env
6789
6790   def BuildHooksNodes(self):
6791     """Build hooks nodes.
6792
6793     """
6794     nl = [
6795       self.cfg.GetMasterNode(),
6796       self.instance.primary_node,
6797       self.op.target_node,
6798       ]
6799     return (nl, nl)
6800
6801   def CheckPrereq(self):
6802     """Check prerequisites.
6803
6804     This checks that the instance is in the cluster.
6805
6806     """
6807     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6808     assert self.instance is not None, \
6809       "Cannot retrieve locked instance %s" % self.op.instance_name
6810
6811     node = self.cfg.GetNodeInfo(self.op.target_node)
6812     assert node is not None, \
6813       "Cannot retrieve locked node %s" % self.op.target_node
6814
6815     self.target_node = target_node = node.name
6816
6817     if target_node == instance.primary_node:
6818       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6819                                  (instance.name, target_node),
6820                                  errors.ECODE_STATE)
6821
6822     bep = self.cfg.GetClusterInfo().FillBE(instance)
6823
6824     for idx, dsk in enumerate(instance.disks):
6825       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6826         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6827                                    " cannot copy" % idx, errors.ECODE_STATE)
6828
6829     _CheckNodeOnline(self, target_node)
6830     _CheckNodeNotDrained(self, target_node)
6831     _CheckNodeVmCapable(self, target_node)
6832
6833     if instance.admin_up:
6834       # check memory requirements on the secondary node
6835       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6836                            instance.name, bep[constants.BE_MEMORY],
6837                            instance.hypervisor)
6838     else:
6839       self.LogInfo("Not checking memory on the secondary node as"
6840                    " instance will not be started")
6841
6842     # check bridge existance
6843     _CheckInstanceBridgesExist(self, instance, node=target_node)
6844
6845   def Exec(self, feedback_fn):
6846     """Move an instance.
6847
6848     The move is done by shutting it down on its present node, copying
6849     the data over (slow) and starting it on the new node.
6850
6851     """
6852     instance = self.instance
6853
6854     source_node = instance.primary_node
6855     target_node = self.target_node
6856
6857     self.LogInfo("Shutting down instance %s on source node %s",
6858                  instance.name, source_node)
6859
6860     result = self.rpc.call_instance_shutdown(source_node, instance,
6861                                              self.op.shutdown_timeout)
6862     msg = result.fail_msg
6863     if msg:
6864       if self.op.ignore_consistency:
6865         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6866                              " Proceeding anyway. Please make sure node"
6867                              " %s is down. Error details: %s",
6868                              instance.name, source_node, source_node, msg)
6869       else:
6870         raise errors.OpExecError("Could not shutdown instance %s on"
6871                                  " node %s: %s" %
6872                                  (instance.name, source_node, msg))
6873
6874     # create the target disks
6875     try:
6876       _CreateDisks(self, instance, target_node=target_node)
6877     except errors.OpExecError:
6878       self.LogWarning("Device creation failed, reverting...")
6879       try:
6880         _RemoveDisks(self, instance, target_node=target_node)
6881       finally:
6882         self.cfg.ReleaseDRBDMinors(instance.name)
6883         raise
6884
6885     cluster_name = self.cfg.GetClusterInfo().cluster_name
6886
6887     errs = []
6888     # activate, get path, copy the data over
6889     for idx, disk in enumerate(instance.disks):
6890       self.LogInfo("Copying data for disk %d", idx)
6891       result = self.rpc.call_blockdev_assemble(target_node, disk,
6892                                                instance.name, True, idx)
6893       if result.fail_msg:
6894         self.LogWarning("Can't assemble newly created disk %d: %s",
6895                         idx, result.fail_msg)
6896         errs.append(result.fail_msg)
6897         break
6898       dev_path = result.payload
6899       result = self.rpc.call_blockdev_export(source_node, disk,
6900                                              target_node, dev_path,
6901                                              cluster_name)
6902       if result.fail_msg:
6903         self.LogWarning("Can't copy data over for disk %d: %s",
6904                         idx, result.fail_msg)
6905         errs.append(result.fail_msg)
6906         break
6907
6908     if errs:
6909       self.LogWarning("Some disks failed to copy, aborting")
6910       try:
6911         _RemoveDisks(self, instance, target_node=target_node)
6912       finally:
6913         self.cfg.ReleaseDRBDMinors(instance.name)
6914         raise errors.OpExecError("Errors during disk copy: %s" %
6915                                  (",".join(errs),))
6916
6917     instance.primary_node = target_node
6918     self.cfg.Update(instance, feedback_fn)
6919
6920     self.LogInfo("Removing the disks on the original node")
6921     _RemoveDisks(self, instance, target_node=source_node)
6922
6923     # Only start the instance if it's marked as up
6924     if instance.admin_up:
6925       self.LogInfo("Starting instance %s on node %s",
6926                    instance.name, target_node)
6927
6928       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6929                                            ignore_secondaries=True)
6930       if not disks_ok:
6931         _ShutdownInstanceDisks(self, instance)
6932         raise errors.OpExecError("Can't activate the instance's disks")
6933
6934       result = self.rpc.call_instance_start(target_node, instance,
6935                                             None, None, False)
6936       msg = result.fail_msg
6937       if msg:
6938         _ShutdownInstanceDisks(self, instance)
6939         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6940                                  (instance.name, target_node, msg))
6941
6942
6943 class LUNodeMigrate(LogicalUnit):
6944   """Migrate all instances from a node.
6945
6946   """
6947   HPATH = "node-migrate"
6948   HTYPE = constants.HTYPE_NODE
6949   REQ_BGL = False
6950
6951   def CheckArguments(self):
6952     pass
6953
6954   def ExpandNames(self):
6955     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6956
6957     self.share_locks = _ShareAll()
6958     self.needed_locks = {
6959       locking.LEVEL_NODE: [self.op.node_name],
6960       }
6961
6962   def BuildHooksEnv(self):
6963     """Build hooks env.
6964
6965     This runs on the master, the primary and all the secondaries.
6966
6967     """
6968     return {
6969       "NODE_NAME": self.op.node_name,
6970       }
6971
6972   def BuildHooksNodes(self):
6973     """Build hooks nodes.
6974
6975     """
6976     nl = [self.cfg.GetMasterNode()]
6977     return (nl, nl)
6978
6979   def CheckPrereq(self):
6980     pass
6981
6982   def Exec(self, feedback_fn):
6983     # Prepare jobs for migration instances
6984     jobs = [
6985       [opcodes.OpInstanceMigrate(instance_name=inst.name,
6986                                  mode=self.op.mode,
6987                                  live=self.op.live,
6988                                  iallocator=self.op.iallocator,
6989                                  target_node=self.op.target_node)]
6990       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6991       ]
6992
6993     # TODO: Run iallocator in this opcode and pass correct placement options to
6994     # OpInstanceMigrate. Since other jobs can modify the cluster between
6995     # running the iallocator and the actual migration, a good consistency model
6996     # will have to be found.
6997
6998     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
6999             frozenset([self.op.node_name]))
7000
7001     return ResultWithJobs(jobs)
7002
7003
7004 class TLMigrateInstance(Tasklet):
7005   """Tasklet class for instance migration.
7006
7007   @type live: boolean
7008   @ivar live: whether the migration will be done live or non-live;
7009       this variable is initalized only after CheckPrereq has run
7010   @type cleanup: boolean
7011   @ivar cleanup: Wheater we cleanup from a failed migration
7012   @type iallocator: string
7013   @ivar iallocator: The iallocator used to determine target_node
7014   @type target_node: string
7015   @ivar target_node: If given, the target_node to reallocate the instance to
7016   @type failover: boolean
7017   @ivar failover: Whether operation results in failover or migration
7018   @type fallback: boolean
7019   @ivar fallback: Whether fallback to failover is allowed if migration not
7020                   possible
7021   @type ignore_consistency: boolean
7022   @ivar ignore_consistency: Wheter we should ignore consistency between source
7023                             and target node
7024   @type shutdown_timeout: int
7025   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7026
7027   """
7028   def __init__(self, lu, instance_name, cleanup=False,
7029                failover=False, fallback=False,
7030                ignore_consistency=False,
7031                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7032     """Initializes this class.
7033
7034     """
7035     Tasklet.__init__(self, lu)
7036
7037     # Parameters
7038     self.instance_name = instance_name
7039     self.cleanup = cleanup
7040     self.live = False # will be overridden later
7041     self.failover = failover
7042     self.fallback = fallback
7043     self.ignore_consistency = ignore_consistency
7044     self.shutdown_timeout = shutdown_timeout
7045
7046   def CheckPrereq(self):
7047     """Check prerequisites.
7048
7049     This checks that the instance is in the cluster.
7050
7051     """
7052     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7053     instance = self.cfg.GetInstanceInfo(instance_name)
7054     assert instance is not None
7055     self.instance = instance
7056
7057     if (not self.cleanup and not instance.admin_up and not self.failover and
7058         self.fallback):
7059       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7060                       " to failover")
7061       self.failover = True
7062
7063     if instance.disk_template not in constants.DTS_MIRRORED:
7064       if self.failover:
7065         text = "failovers"
7066       else:
7067         text = "migrations"
7068       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7069                                  " %s" % (instance.disk_template, text),
7070                                  errors.ECODE_STATE)
7071
7072     if instance.disk_template in constants.DTS_EXT_MIRROR:
7073       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7074
7075       if self.lu.op.iallocator:
7076         self._RunAllocator()
7077       else:
7078         # We set set self.target_node as it is required by
7079         # BuildHooksEnv
7080         self.target_node = self.lu.op.target_node
7081
7082       # self.target_node is already populated, either directly or by the
7083       # iallocator run
7084       target_node = self.target_node
7085       if self.target_node == instance.primary_node:
7086         raise errors.OpPrereqError("Cannot migrate instance %s"
7087                                    " to its primary (%s)" %
7088                                    (instance.name, instance.primary_node))
7089
7090       if len(self.lu.tasklets) == 1:
7091         # It is safe to release locks only when we're the only tasklet
7092         # in the LU
7093         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7094                       keep=[instance.primary_node, self.target_node])
7095
7096     else:
7097       secondary_nodes = instance.secondary_nodes
7098       if not secondary_nodes:
7099         raise errors.ConfigurationError("No secondary node but using"
7100                                         " %s disk template" %
7101                                         instance.disk_template)
7102       target_node = secondary_nodes[0]
7103       if self.lu.op.iallocator or (self.lu.op.target_node and
7104                                    self.lu.op.target_node != target_node):
7105         if self.failover:
7106           text = "failed over"
7107         else:
7108           text = "migrated"
7109         raise errors.OpPrereqError("Instances with disk template %s cannot"
7110                                    " be %s to arbitrary nodes"
7111                                    " (neither an iallocator nor a target"
7112                                    " node can be passed)" %
7113                                    (instance.disk_template, text),
7114                                    errors.ECODE_INVAL)
7115
7116     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7117
7118     # check memory requirements on the secondary node
7119     if not self.failover or instance.admin_up:
7120       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7121                            instance.name, i_be[constants.BE_MEMORY],
7122                            instance.hypervisor)
7123     else:
7124       self.lu.LogInfo("Not checking memory on the secondary node as"
7125                       " instance will not be started")
7126
7127     # check bridge existance
7128     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7129
7130     if not self.cleanup:
7131       _CheckNodeNotDrained(self.lu, target_node)
7132       if not self.failover:
7133         result = self.rpc.call_instance_migratable(instance.primary_node,
7134                                                    instance)
7135         if result.fail_msg and self.fallback:
7136           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7137                           " failover")
7138           self.failover = True
7139         else:
7140           result.Raise("Can't migrate, please use failover",
7141                        prereq=True, ecode=errors.ECODE_STATE)
7142
7143     assert not (self.failover and self.cleanup)
7144
7145     if not self.failover:
7146       if self.lu.op.live is not None and self.lu.op.mode is not None:
7147         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7148                                    " parameters are accepted",
7149                                    errors.ECODE_INVAL)
7150       if self.lu.op.live is not None:
7151         if self.lu.op.live:
7152           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7153         else:
7154           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7155         # reset the 'live' parameter to None so that repeated
7156         # invocations of CheckPrereq do not raise an exception
7157         self.lu.op.live = None
7158       elif self.lu.op.mode is None:
7159         # read the default value from the hypervisor
7160         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7161                                                 skip_globals=False)
7162         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7163
7164       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7165     else:
7166       # Failover is never live
7167       self.live = False
7168
7169   def _RunAllocator(self):
7170     """Run the allocator based on input opcode.
7171
7172     """
7173     ial = IAllocator(self.cfg, self.rpc,
7174                      mode=constants.IALLOCATOR_MODE_RELOC,
7175                      name=self.instance_name,
7176                      # TODO See why hail breaks with a single node below
7177                      relocate_from=[self.instance.primary_node,
7178                                     self.instance.primary_node],
7179                      )
7180
7181     ial.Run(self.lu.op.iallocator)
7182
7183     if not ial.success:
7184       raise errors.OpPrereqError("Can't compute nodes using"
7185                                  " iallocator '%s': %s" %
7186                                  (self.lu.op.iallocator, ial.info),
7187                                  errors.ECODE_NORES)
7188     if len(ial.result) != ial.required_nodes:
7189       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7190                                  " of nodes (%s), required %s" %
7191                                  (self.lu.op.iallocator, len(ial.result),
7192                                   ial.required_nodes), errors.ECODE_FAULT)
7193     self.target_node = ial.result[0]
7194     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7195                  self.instance_name, self.lu.op.iallocator,
7196                  utils.CommaJoin(ial.result))
7197
7198   def _WaitUntilSync(self):
7199     """Poll with custom rpc for disk sync.
7200
7201     This uses our own step-based rpc call.
7202
7203     """
7204     self.feedback_fn("* wait until resync is done")
7205     all_done = False
7206     while not all_done:
7207       all_done = True
7208       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7209                                             self.nodes_ip,
7210                                             self.instance.disks)
7211       min_percent = 100
7212       for node, nres in result.items():
7213         nres.Raise("Cannot resync disks on node %s" % node)
7214         node_done, node_percent = nres.payload
7215         all_done = all_done and node_done
7216         if node_percent is not None:
7217           min_percent = min(min_percent, node_percent)
7218       if not all_done:
7219         if min_percent < 100:
7220           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7221         time.sleep(2)
7222
7223   def _EnsureSecondary(self, node):
7224     """Demote a node to secondary.
7225
7226     """
7227     self.feedback_fn("* switching node %s to secondary mode" % node)
7228
7229     for dev in self.instance.disks:
7230       self.cfg.SetDiskID(dev, node)
7231
7232     result = self.rpc.call_blockdev_close(node, self.instance.name,
7233                                           self.instance.disks)
7234     result.Raise("Cannot change disk to secondary on node %s" % node)
7235
7236   def _GoStandalone(self):
7237     """Disconnect from the network.
7238
7239     """
7240     self.feedback_fn("* changing into standalone mode")
7241     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7242                                                self.instance.disks)
7243     for node, nres in result.items():
7244       nres.Raise("Cannot disconnect disks node %s" % node)
7245
7246   def _GoReconnect(self, multimaster):
7247     """Reconnect to the network.
7248
7249     """
7250     if multimaster:
7251       msg = "dual-master"
7252     else:
7253       msg = "single-master"
7254     self.feedback_fn("* changing disks into %s mode" % msg)
7255     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7256                                            self.instance.disks,
7257                                            self.instance.name, multimaster)
7258     for node, nres in result.items():
7259       nres.Raise("Cannot change disks config on node %s" % node)
7260
7261   def _ExecCleanup(self):
7262     """Try to cleanup after a failed migration.
7263
7264     The cleanup is done by:
7265       - check that the instance is running only on one node
7266         (and update the config if needed)
7267       - change disks on its secondary node to secondary
7268       - wait until disks are fully synchronized
7269       - disconnect from the network
7270       - change disks into single-master mode
7271       - wait again until disks are fully synchronized
7272
7273     """
7274     instance = self.instance
7275     target_node = self.target_node
7276     source_node = self.source_node
7277
7278     # check running on only one node
7279     self.feedback_fn("* checking where the instance actually runs"
7280                      " (if this hangs, the hypervisor might be in"
7281                      " a bad state)")
7282     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7283     for node, result in ins_l.items():
7284       result.Raise("Can't contact node %s" % node)
7285
7286     runningon_source = instance.name in ins_l[source_node].payload
7287     runningon_target = instance.name in ins_l[target_node].payload
7288
7289     if runningon_source and runningon_target:
7290       raise errors.OpExecError("Instance seems to be running on two nodes,"
7291                                " or the hypervisor is confused; you will have"
7292                                " to ensure manually that it runs only on one"
7293                                " and restart this operation")
7294
7295     if not (runningon_source or runningon_target):
7296       raise errors.OpExecError("Instance does not seem to be running at all;"
7297                                " in this case it's safer to repair by"
7298                                " running 'gnt-instance stop' to ensure disk"
7299                                " shutdown, and then restarting it")
7300
7301     if runningon_target:
7302       # the migration has actually succeeded, we need to update the config
7303       self.feedback_fn("* instance running on secondary node (%s),"
7304                        " updating config" % target_node)
7305       instance.primary_node = target_node
7306       self.cfg.Update(instance, self.feedback_fn)
7307       demoted_node = source_node
7308     else:
7309       self.feedback_fn("* instance confirmed to be running on its"
7310                        " primary node (%s)" % source_node)
7311       demoted_node = target_node
7312
7313     if instance.disk_template in constants.DTS_INT_MIRROR:
7314       self._EnsureSecondary(demoted_node)
7315       try:
7316         self._WaitUntilSync()
7317       except errors.OpExecError:
7318         # we ignore here errors, since if the device is standalone, it
7319         # won't be able to sync
7320         pass
7321       self._GoStandalone()
7322       self._GoReconnect(False)
7323       self._WaitUntilSync()
7324
7325     self.feedback_fn("* done")
7326
7327   def _RevertDiskStatus(self):
7328     """Try to revert the disk status after a failed migration.
7329
7330     """
7331     target_node = self.target_node
7332     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7333       return
7334
7335     try:
7336       self._EnsureSecondary(target_node)
7337       self._GoStandalone()
7338       self._GoReconnect(False)
7339       self._WaitUntilSync()
7340     except errors.OpExecError, err:
7341       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7342                          " please try to recover the instance manually;"
7343                          " error '%s'" % str(err))
7344
7345   def _AbortMigration(self):
7346     """Call the hypervisor code to abort a started migration.
7347
7348     """
7349     instance = self.instance
7350     target_node = self.target_node
7351     migration_info = self.migration_info
7352
7353     abort_result = self.rpc.call_finalize_migration(target_node,
7354                                                     instance,
7355                                                     migration_info,
7356                                                     False)
7357     abort_msg = abort_result.fail_msg
7358     if abort_msg:
7359       logging.error("Aborting migration failed on target node %s: %s",
7360                     target_node, abort_msg)
7361       # Don't raise an exception here, as we stil have to try to revert the
7362       # disk status, even if this step failed.
7363
7364   def _ExecMigration(self):
7365     """Migrate an instance.
7366
7367     The migrate is done by:
7368       - change the disks into dual-master mode
7369       - wait until disks are fully synchronized again
7370       - migrate the instance
7371       - change disks on the new secondary node (the old primary) to secondary
7372       - wait until disks are fully synchronized
7373       - change disks into single-master mode
7374
7375     """
7376     instance = self.instance
7377     target_node = self.target_node
7378     source_node = self.source_node
7379
7380     self.feedback_fn("* checking disk consistency between source and target")
7381     for dev in instance.disks:
7382       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7383         raise errors.OpExecError("Disk %s is degraded or not fully"
7384                                  " synchronized on target node,"
7385                                  " aborting migration" % dev.iv_name)
7386
7387     # First get the migration information from the remote node
7388     result = self.rpc.call_migration_info(source_node, instance)
7389     msg = result.fail_msg
7390     if msg:
7391       log_err = ("Failed fetching source migration information from %s: %s" %
7392                  (source_node, msg))
7393       logging.error(log_err)
7394       raise errors.OpExecError(log_err)
7395
7396     self.migration_info = migration_info = result.payload
7397
7398     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7399       # Then switch the disks to master/master mode
7400       self._EnsureSecondary(target_node)
7401       self._GoStandalone()
7402       self._GoReconnect(True)
7403       self._WaitUntilSync()
7404
7405     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7406     result = self.rpc.call_accept_instance(target_node,
7407                                            instance,
7408                                            migration_info,
7409                                            self.nodes_ip[target_node])
7410
7411     msg = result.fail_msg
7412     if msg:
7413       logging.error("Instance pre-migration failed, trying to revert"
7414                     " disk status: %s", msg)
7415       self.feedback_fn("Pre-migration failed, aborting")
7416       self._AbortMigration()
7417       self._RevertDiskStatus()
7418       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7419                                (instance.name, msg))
7420
7421     self.feedback_fn("* migrating instance to %s" % target_node)
7422     result = self.rpc.call_instance_migrate(source_node, instance,
7423                                             self.nodes_ip[target_node],
7424                                             self.live)
7425     msg = result.fail_msg
7426     if msg:
7427       logging.error("Instance migration failed, trying to revert"
7428                     " disk status: %s", msg)
7429       self.feedback_fn("Migration failed, aborting")
7430       self._AbortMigration()
7431       self._RevertDiskStatus()
7432       raise errors.OpExecError("Could not migrate instance %s: %s" %
7433                                (instance.name, msg))
7434
7435     instance.primary_node = target_node
7436     # distribute new instance config to the other nodes
7437     self.cfg.Update(instance, self.feedback_fn)
7438
7439     result = self.rpc.call_finalize_migration(target_node,
7440                                               instance,
7441                                               migration_info,
7442                                               True)
7443     msg = result.fail_msg
7444     if msg:
7445       logging.error("Instance migration succeeded, but finalization failed:"
7446                     " %s", msg)
7447       raise errors.OpExecError("Could not finalize instance migration: %s" %
7448                                msg)
7449
7450     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7451       self._EnsureSecondary(source_node)
7452       self._WaitUntilSync()
7453       self._GoStandalone()
7454       self._GoReconnect(False)
7455       self._WaitUntilSync()
7456
7457     self.feedback_fn("* done")
7458
7459   def _ExecFailover(self):
7460     """Failover an instance.
7461
7462     The failover is done by shutting it down on its present node and
7463     starting it on the secondary.
7464
7465     """
7466     instance = self.instance
7467     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7468
7469     source_node = instance.primary_node
7470     target_node = self.target_node
7471
7472     if instance.admin_up:
7473       self.feedback_fn("* checking disk consistency between source and target")
7474       for dev in instance.disks:
7475         # for drbd, these are drbd over lvm
7476         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7477           if primary_node.offline:
7478             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7479                              " target node %s" %
7480                              (primary_node.name, dev.iv_name, target_node))
7481           elif not self.ignore_consistency:
7482             raise errors.OpExecError("Disk %s is degraded on target node,"
7483                                      " aborting failover" % dev.iv_name)
7484     else:
7485       self.feedback_fn("* not checking disk consistency as instance is not"
7486                        " running")
7487
7488     self.feedback_fn("* shutting down instance on source node")
7489     logging.info("Shutting down instance %s on node %s",
7490                  instance.name, source_node)
7491
7492     result = self.rpc.call_instance_shutdown(source_node, instance,
7493                                              self.shutdown_timeout)
7494     msg = result.fail_msg
7495     if msg:
7496       if self.ignore_consistency or primary_node.offline:
7497         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7498                            " proceeding anyway; please make sure node"
7499                            " %s is down; error details: %s",
7500                            instance.name, source_node, source_node, msg)
7501       else:
7502         raise errors.OpExecError("Could not shutdown instance %s on"
7503                                  " node %s: %s" %
7504                                  (instance.name, source_node, msg))
7505
7506     self.feedback_fn("* deactivating the instance's disks on source node")
7507     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7508       raise errors.OpExecError("Can't shut down the instance's disks")
7509
7510     instance.primary_node = target_node
7511     # distribute new instance config to the other nodes
7512     self.cfg.Update(instance, self.feedback_fn)
7513
7514     # Only start the instance if it's marked as up
7515     if instance.admin_up:
7516       self.feedback_fn("* activating the instance's disks on target node %s" %
7517                        target_node)
7518       logging.info("Starting instance %s on node %s",
7519                    instance.name, target_node)
7520
7521       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7522                                            ignore_secondaries=True)
7523       if not disks_ok:
7524         _ShutdownInstanceDisks(self.lu, instance)
7525         raise errors.OpExecError("Can't activate the instance's disks")
7526
7527       self.feedback_fn("* starting the instance on the target node %s" %
7528                        target_node)
7529       result = self.rpc.call_instance_start(target_node, instance, None, None,
7530                                             False)
7531       msg = result.fail_msg
7532       if msg:
7533         _ShutdownInstanceDisks(self.lu, instance)
7534         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7535                                  (instance.name, target_node, msg))
7536
7537   def Exec(self, feedback_fn):
7538     """Perform the migration.
7539
7540     """
7541     self.feedback_fn = feedback_fn
7542     self.source_node = self.instance.primary_node
7543
7544     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7545     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7546       self.target_node = self.instance.secondary_nodes[0]
7547       # Otherwise self.target_node has been populated either
7548       # directly, or through an iallocator.
7549
7550     self.all_nodes = [self.source_node, self.target_node]
7551     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7552                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7553
7554     if self.failover:
7555       feedback_fn("Failover instance %s" % self.instance.name)
7556       self._ExecFailover()
7557     else:
7558       feedback_fn("Migrating instance %s" % self.instance.name)
7559
7560       if self.cleanup:
7561         return self._ExecCleanup()
7562       else:
7563         return self._ExecMigration()
7564
7565
7566 def _CreateBlockDev(lu, node, instance, device, force_create,
7567                     info, force_open):
7568   """Create a tree of block devices on a given node.
7569
7570   If this device type has to be created on secondaries, create it and
7571   all its children.
7572
7573   If not, just recurse to children keeping the same 'force' value.
7574
7575   @param lu: the lu on whose behalf we execute
7576   @param node: the node on which to create the device
7577   @type instance: L{objects.Instance}
7578   @param instance: the instance which owns the device
7579   @type device: L{objects.Disk}
7580   @param device: the device to create
7581   @type force_create: boolean
7582   @param force_create: whether to force creation of this device; this
7583       will be change to True whenever we find a device which has
7584       CreateOnSecondary() attribute
7585   @param info: the extra 'metadata' we should attach to the device
7586       (this will be represented as a LVM tag)
7587   @type force_open: boolean
7588   @param force_open: this parameter will be passes to the
7589       L{backend.BlockdevCreate} function where it specifies
7590       whether we run on primary or not, and it affects both
7591       the child assembly and the device own Open() execution
7592
7593   """
7594   if device.CreateOnSecondary():
7595     force_create = True
7596
7597   if device.children:
7598     for child in device.children:
7599       _CreateBlockDev(lu, node, instance, child, force_create,
7600                       info, force_open)
7601
7602   if not force_create:
7603     return
7604
7605   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7606
7607
7608 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7609   """Create a single block device on a given node.
7610
7611   This will not recurse over children of the device, so they must be
7612   created in advance.
7613
7614   @param lu: the lu on whose behalf we execute
7615   @param node: the node on which to create the device
7616   @type instance: L{objects.Instance}
7617   @param instance: the instance which owns the device
7618   @type device: L{objects.Disk}
7619   @param device: the device to create
7620   @param info: the extra 'metadata' we should attach to the device
7621       (this will be represented as a LVM tag)
7622   @type force_open: boolean
7623   @param force_open: this parameter will be passes to the
7624       L{backend.BlockdevCreate} function where it specifies
7625       whether we run on primary or not, and it affects both
7626       the child assembly and the device own Open() execution
7627
7628   """
7629   lu.cfg.SetDiskID(device, node)
7630   result = lu.rpc.call_blockdev_create(node, device, device.size,
7631                                        instance.name, force_open, info)
7632   result.Raise("Can't create block device %s on"
7633                " node %s for instance %s" % (device, node, instance.name))
7634   if device.physical_id is None:
7635     device.physical_id = result.payload
7636
7637
7638 def _GenerateUniqueNames(lu, exts):
7639   """Generate a suitable LV name.
7640
7641   This will generate a logical volume name for the given instance.
7642
7643   """
7644   results = []
7645   for val in exts:
7646     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7647     results.append("%s%s" % (new_id, val))
7648   return results
7649
7650
7651 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7652                          iv_name, p_minor, s_minor):
7653   """Generate a drbd8 device complete with its children.
7654
7655   """
7656   assert len(vgnames) == len(names) == 2
7657   port = lu.cfg.AllocatePort()
7658   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7659   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7660                           logical_id=(vgnames[0], names[0]))
7661   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7662                           logical_id=(vgnames[1], names[1]))
7663   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7664                           logical_id=(primary, secondary, port,
7665                                       p_minor, s_minor,
7666                                       shared_secret),
7667                           children=[dev_data, dev_meta],
7668                           iv_name=iv_name)
7669   return drbd_dev
7670
7671
7672 def _GenerateDiskTemplate(lu, template_name,
7673                           instance_name, primary_node,
7674                           secondary_nodes, disk_info,
7675                           file_storage_dir, file_driver,
7676                           base_index, feedback_fn):
7677   """Generate the entire disk layout for a given template type.
7678
7679   """
7680   #TODO: compute space requirements
7681
7682   vgname = lu.cfg.GetVGName()
7683   disk_count = len(disk_info)
7684   disks = []
7685   if template_name == constants.DT_DISKLESS:
7686     pass
7687   elif template_name == constants.DT_PLAIN:
7688     if len(secondary_nodes) != 0:
7689       raise errors.ProgrammerError("Wrong template configuration")
7690
7691     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7692                                       for i in range(disk_count)])
7693     for idx, disk in enumerate(disk_info):
7694       disk_index = idx + base_index
7695       vg = disk.get(constants.IDISK_VG, vgname)
7696       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7697       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7698                               size=disk[constants.IDISK_SIZE],
7699                               logical_id=(vg, names[idx]),
7700                               iv_name="disk/%d" % disk_index,
7701                               mode=disk[constants.IDISK_MODE])
7702       disks.append(disk_dev)
7703   elif template_name == constants.DT_DRBD8:
7704     if len(secondary_nodes) != 1:
7705       raise errors.ProgrammerError("Wrong template configuration")
7706     remote_node = secondary_nodes[0]
7707     minors = lu.cfg.AllocateDRBDMinor(
7708       [primary_node, remote_node] * len(disk_info), instance_name)
7709
7710     names = []
7711     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7712                                                for i in range(disk_count)]):
7713       names.append(lv_prefix + "_data")
7714       names.append(lv_prefix + "_meta")
7715     for idx, disk in enumerate(disk_info):
7716       disk_index = idx + base_index
7717       data_vg = disk.get(constants.IDISK_VG, vgname)
7718       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7719       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7720                                       disk[constants.IDISK_SIZE],
7721                                       [data_vg, meta_vg],
7722                                       names[idx * 2:idx * 2 + 2],
7723                                       "disk/%d" % disk_index,
7724                                       minors[idx * 2], minors[idx * 2 + 1])
7725       disk_dev.mode = disk[constants.IDISK_MODE]
7726       disks.append(disk_dev)
7727   elif template_name == constants.DT_FILE:
7728     if len(secondary_nodes) != 0:
7729       raise errors.ProgrammerError("Wrong template configuration")
7730
7731     opcodes.RequireFileStorage()
7732
7733     for idx, disk in enumerate(disk_info):
7734       disk_index = idx + base_index
7735       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7736                               size=disk[constants.IDISK_SIZE],
7737                               iv_name="disk/%d" % disk_index,
7738                               logical_id=(file_driver,
7739                                           "%s/disk%d" % (file_storage_dir,
7740                                                          disk_index)),
7741                               mode=disk[constants.IDISK_MODE])
7742       disks.append(disk_dev)
7743   elif template_name == constants.DT_SHARED_FILE:
7744     if len(secondary_nodes) != 0:
7745       raise errors.ProgrammerError("Wrong template configuration")
7746
7747     opcodes.RequireSharedFileStorage()
7748
7749     for idx, disk in enumerate(disk_info):
7750       disk_index = idx + base_index
7751       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7752                               size=disk[constants.IDISK_SIZE],
7753                               iv_name="disk/%d" % disk_index,
7754                               logical_id=(file_driver,
7755                                           "%s/disk%d" % (file_storage_dir,
7756                                                          disk_index)),
7757                               mode=disk[constants.IDISK_MODE])
7758       disks.append(disk_dev)
7759   elif template_name == constants.DT_BLOCK:
7760     if len(secondary_nodes) != 0:
7761       raise errors.ProgrammerError("Wrong template configuration")
7762
7763     for idx, disk in enumerate(disk_info):
7764       disk_index = idx + base_index
7765       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7766                               size=disk[constants.IDISK_SIZE],
7767                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7768                                           disk[constants.IDISK_ADOPT]),
7769                               iv_name="disk/%d" % disk_index,
7770                               mode=disk[constants.IDISK_MODE])
7771       disks.append(disk_dev)
7772
7773   else:
7774     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7775   return disks
7776
7777
7778 def _GetInstanceInfoText(instance):
7779   """Compute that text that should be added to the disk's metadata.
7780
7781   """
7782   return "originstname+%s" % instance.name
7783
7784
7785 def _CalcEta(time_taken, written, total_size):
7786   """Calculates the ETA based on size written and total size.
7787
7788   @param time_taken: The time taken so far
7789   @param written: amount written so far
7790   @param total_size: The total size of data to be written
7791   @return: The remaining time in seconds
7792
7793   """
7794   avg_time = time_taken / float(written)
7795   return (total_size - written) * avg_time
7796
7797
7798 def _WipeDisks(lu, instance):
7799   """Wipes instance disks.
7800
7801   @type lu: L{LogicalUnit}
7802   @param lu: the logical unit on whose behalf we execute
7803   @type instance: L{objects.Instance}
7804   @param instance: the instance whose disks we should create
7805   @return: the success of the wipe
7806
7807   """
7808   node = instance.primary_node
7809
7810   for device in instance.disks:
7811     lu.cfg.SetDiskID(device, node)
7812
7813   logging.info("Pause sync of instance %s disks", instance.name)
7814   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7815
7816   for idx, success in enumerate(result.payload):
7817     if not success:
7818       logging.warn("pause-sync of instance %s for disks %d failed",
7819                    instance.name, idx)
7820
7821   try:
7822     for idx, device in enumerate(instance.disks):
7823       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7824       # MAX_WIPE_CHUNK at max
7825       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7826                             constants.MIN_WIPE_CHUNK_PERCENT)
7827       # we _must_ make this an int, otherwise rounding errors will
7828       # occur
7829       wipe_chunk_size = int(wipe_chunk_size)
7830
7831       lu.LogInfo("* Wiping disk %d", idx)
7832       logging.info("Wiping disk %d for instance %s, node %s using"
7833                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7834
7835       offset = 0
7836       size = device.size
7837       last_output = 0
7838       start_time = time.time()
7839
7840       while offset < size:
7841         wipe_size = min(wipe_chunk_size, size - offset)
7842         logging.debug("Wiping disk %d, offset %s, chunk %s",
7843                       idx, offset, wipe_size)
7844         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7845         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7846                      (idx, offset, wipe_size))
7847         now = time.time()
7848         offset += wipe_size
7849         if now - last_output >= 60:
7850           eta = _CalcEta(now - start_time, offset, size)
7851           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7852                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7853           last_output = now
7854   finally:
7855     logging.info("Resume sync of instance %s disks", instance.name)
7856
7857     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7858
7859     for idx, success in enumerate(result.payload):
7860       if not success:
7861         lu.LogWarning("Resume sync of disk %d failed, please have a"
7862                       " look at the status and troubleshoot the issue", idx)
7863         logging.warn("resume-sync of instance %s for disks %d failed",
7864                      instance.name, idx)
7865
7866
7867 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7868   """Create all disks for an instance.
7869
7870   This abstracts away some work from AddInstance.
7871
7872   @type lu: L{LogicalUnit}
7873   @param lu: the logical unit on whose behalf we execute
7874   @type instance: L{objects.Instance}
7875   @param instance: the instance whose disks we should create
7876   @type to_skip: list
7877   @param to_skip: list of indices to skip
7878   @type target_node: string
7879   @param target_node: if passed, overrides the target node for creation
7880   @rtype: boolean
7881   @return: the success of the creation
7882
7883   """
7884   info = _GetInstanceInfoText(instance)
7885   if target_node is None:
7886     pnode = instance.primary_node
7887     all_nodes = instance.all_nodes
7888   else:
7889     pnode = target_node
7890     all_nodes = [pnode]
7891
7892   if instance.disk_template in constants.DTS_FILEBASED:
7893     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7894     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7895
7896     result.Raise("Failed to create directory '%s' on"
7897                  " node %s" % (file_storage_dir, pnode))
7898
7899   # Note: this needs to be kept in sync with adding of disks in
7900   # LUInstanceSetParams
7901   for idx, device in enumerate(instance.disks):
7902     if to_skip and idx in to_skip:
7903       continue
7904     logging.info("Creating volume %s for instance %s",
7905                  device.iv_name, instance.name)
7906     #HARDCODE
7907     for node in all_nodes:
7908       f_create = node == pnode
7909       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7910
7911
7912 def _RemoveDisks(lu, instance, target_node=None):
7913   """Remove all disks for an instance.
7914
7915   This abstracts away some work from `AddInstance()` and
7916   `RemoveInstance()`. Note that in case some of the devices couldn't
7917   be removed, the removal will continue with the other ones (compare
7918   with `_CreateDisks()`).
7919
7920   @type lu: L{LogicalUnit}
7921   @param lu: the logical unit on whose behalf we execute
7922   @type instance: L{objects.Instance}
7923   @param instance: the instance whose disks we should remove
7924   @type target_node: string
7925   @param target_node: used to override the node on which to remove the disks
7926   @rtype: boolean
7927   @return: the success of the removal
7928
7929   """
7930   logging.info("Removing block devices for instance %s", instance.name)
7931
7932   all_result = True
7933   for device in instance.disks:
7934     if target_node:
7935       edata = [(target_node, device)]
7936     else:
7937       edata = device.ComputeNodeTree(instance.primary_node)
7938     for node, disk in edata:
7939       lu.cfg.SetDiskID(disk, node)
7940       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7941       if msg:
7942         lu.LogWarning("Could not remove block device %s on node %s,"
7943                       " continuing anyway: %s", device.iv_name, node, msg)
7944         all_result = False
7945
7946   if instance.disk_template == constants.DT_FILE:
7947     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7948     if target_node:
7949       tgt = target_node
7950     else:
7951       tgt = instance.primary_node
7952     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7953     if result.fail_msg:
7954       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7955                     file_storage_dir, instance.primary_node, result.fail_msg)
7956       all_result = False
7957
7958   return all_result
7959
7960
7961 def _ComputeDiskSizePerVG(disk_template, disks):
7962   """Compute disk size requirements in the volume group
7963
7964   """
7965   def _compute(disks, payload):
7966     """Universal algorithm.
7967
7968     """
7969     vgs = {}
7970     for disk in disks:
7971       vgs[disk[constants.IDISK_VG]] = \
7972         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7973
7974     return vgs
7975
7976   # Required free disk space as a function of disk and swap space
7977   req_size_dict = {
7978     constants.DT_DISKLESS: {},
7979     constants.DT_PLAIN: _compute(disks, 0),
7980     # 128 MB are added for drbd metadata for each disk
7981     constants.DT_DRBD8: _compute(disks, 128),
7982     constants.DT_FILE: {},
7983     constants.DT_SHARED_FILE: {},
7984   }
7985
7986   if disk_template not in req_size_dict:
7987     raise errors.ProgrammerError("Disk template '%s' size requirement"
7988                                  " is unknown" % disk_template)
7989
7990   return req_size_dict[disk_template]
7991
7992
7993 def _ComputeDiskSize(disk_template, disks):
7994   """Compute disk size requirements in the volume group
7995
7996   """
7997   # Required free disk space as a function of disk and swap space
7998   req_size_dict = {
7999     constants.DT_DISKLESS: None,
8000     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8001     # 128 MB are added for drbd metadata for each disk
8002     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8003     constants.DT_FILE: None,
8004     constants.DT_SHARED_FILE: 0,
8005     constants.DT_BLOCK: 0,
8006   }
8007
8008   if disk_template not in req_size_dict:
8009     raise errors.ProgrammerError("Disk template '%s' size requirement"
8010                                  " is unknown" % disk_template)
8011
8012   return req_size_dict[disk_template]
8013
8014
8015 def _FilterVmNodes(lu, nodenames):
8016   """Filters out non-vm_capable nodes from a list.
8017
8018   @type lu: L{LogicalUnit}
8019   @param lu: the logical unit for which we check
8020   @type nodenames: list
8021   @param nodenames: the list of nodes on which we should check
8022   @rtype: list
8023   @return: the list of vm-capable nodes
8024
8025   """
8026   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8027   return [name for name in nodenames if name not in vm_nodes]
8028
8029
8030 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8031   """Hypervisor parameter validation.
8032
8033   This function abstract the hypervisor parameter validation to be
8034   used in both instance create and instance modify.
8035
8036   @type lu: L{LogicalUnit}
8037   @param lu: the logical unit for which we check
8038   @type nodenames: list
8039   @param nodenames: the list of nodes on which we should check
8040   @type hvname: string
8041   @param hvname: the name of the hypervisor we should use
8042   @type hvparams: dict
8043   @param hvparams: the parameters which we need to check
8044   @raise errors.OpPrereqError: if the parameters are not valid
8045
8046   """
8047   nodenames = _FilterVmNodes(lu, nodenames)
8048   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8049                                                   hvname,
8050                                                   hvparams)
8051   for node in nodenames:
8052     info = hvinfo[node]
8053     if info.offline:
8054       continue
8055     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8056
8057
8058 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8059   """OS parameters validation.
8060
8061   @type lu: L{LogicalUnit}
8062   @param lu: the logical unit for which we check
8063   @type required: boolean
8064   @param required: whether the validation should fail if the OS is not
8065       found
8066   @type nodenames: list
8067   @param nodenames: the list of nodes on which we should check
8068   @type osname: string
8069   @param osname: the name of the hypervisor we should use
8070   @type osparams: dict
8071   @param osparams: the parameters which we need to check
8072   @raise errors.OpPrereqError: if the parameters are not valid
8073
8074   """
8075   nodenames = _FilterVmNodes(lu, nodenames)
8076   result = lu.rpc.call_os_validate(required, nodenames, osname,
8077                                    [constants.OS_VALIDATE_PARAMETERS],
8078                                    osparams)
8079   for node, nres in result.items():
8080     # we don't check for offline cases since this should be run only
8081     # against the master node and/or an instance's nodes
8082     nres.Raise("OS Parameters validation failed on node %s" % node)
8083     if not nres.payload:
8084       lu.LogInfo("OS %s not found on node %s, validation skipped",
8085                  osname, node)
8086
8087
8088 class LUInstanceCreate(LogicalUnit):
8089   """Create an instance.
8090
8091   """
8092   HPATH = "instance-add"
8093   HTYPE = constants.HTYPE_INSTANCE
8094   REQ_BGL = False
8095
8096   def CheckArguments(self):
8097     """Check arguments.
8098
8099     """
8100     # do not require name_check to ease forward/backward compatibility
8101     # for tools
8102     if self.op.no_install and self.op.start:
8103       self.LogInfo("No-installation mode selected, disabling startup")
8104       self.op.start = False
8105     # validate/normalize the instance name
8106     self.op.instance_name = \
8107       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8108
8109     if self.op.ip_check and not self.op.name_check:
8110       # TODO: make the ip check more flexible and not depend on the name check
8111       raise errors.OpPrereqError("Cannot do IP address check without a name"
8112                                  " check", errors.ECODE_INVAL)
8113
8114     # check nics' parameter names
8115     for nic in self.op.nics:
8116       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8117
8118     # check disks. parameter names and consistent adopt/no-adopt strategy
8119     has_adopt = has_no_adopt = False
8120     for disk in self.op.disks:
8121       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8122       if constants.IDISK_ADOPT in disk:
8123         has_adopt = True
8124       else:
8125         has_no_adopt = True
8126     if has_adopt and has_no_adopt:
8127       raise errors.OpPrereqError("Either all disks are adopted or none is",
8128                                  errors.ECODE_INVAL)
8129     if has_adopt:
8130       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8131         raise errors.OpPrereqError("Disk adoption is not supported for the"
8132                                    " '%s' disk template" %
8133                                    self.op.disk_template,
8134                                    errors.ECODE_INVAL)
8135       if self.op.iallocator is not None:
8136         raise errors.OpPrereqError("Disk adoption not allowed with an"
8137                                    " iallocator script", errors.ECODE_INVAL)
8138       if self.op.mode == constants.INSTANCE_IMPORT:
8139         raise errors.OpPrereqError("Disk adoption not allowed for"
8140                                    " instance import", errors.ECODE_INVAL)
8141     else:
8142       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8143         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8144                                    " but no 'adopt' parameter given" %
8145                                    self.op.disk_template,
8146                                    errors.ECODE_INVAL)
8147
8148     self.adopt_disks = has_adopt
8149
8150     # instance name verification
8151     if self.op.name_check:
8152       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8153       self.op.instance_name = self.hostname1.name
8154       # used in CheckPrereq for ip ping check
8155       self.check_ip = self.hostname1.ip
8156     else:
8157       self.check_ip = None
8158
8159     # file storage checks
8160     if (self.op.file_driver and
8161         not self.op.file_driver in constants.FILE_DRIVER):
8162       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8163                                  self.op.file_driver, errors.ECODE_INVAL)
8164
8165     if self.op.disk_template == constants.DT_FILE:
8166       opcodes.RequireFileStorage()
8167     elif self.op.disk_template == constants.DT_SHARED_FILE:
8168       opcodes.RequireSharedFileStorage()
8169
8170     ### Node/iallocator related checks
8171     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8172
8173     if self.op.pnode is not None:
8174       if self.op.disk_template in constants.DTS_INT_MIRROR:
8175         if self.op.snode is None:
8176           raise errors.OpPrereqError("The networked disk templates need"
8177                                      " a mirror node", errors.ECODE_INVAL)
8178       elif self.op.snode:
8179         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8180                         " template")
8181         self.op.snode = None
8182
8183     self._cds = _GetClusterDomainSecret()
8184
8185     if self.op.mode == constants.INSTANCE_IMPORT:
8186       # On import force_variant must be True, because if we forced it at
8187       # initial install, our only chance when importing it back is that it
8188       # works again!
8189       self.op.force_variant = True
8190
8191       if self.op.no_install:
8192         self.LogInfo("No-installation mode has no effect during import")
8193
8194     elif self.op.mode == constants.INSTANCE_CREATE:
8195       if self.op.os_type is None:
8196         raise errors.OpPrereqError("No guest OS specified",
8197                                    errors.ECODE_INVAL)
8198       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8199         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8200                                    " installation" % self.op.os_type,
8201                                    errors.ECODE_STATE)
8202       if self.op.disk_template is None:
8203         raise errors.OpPrereqError("No disk template specified",
8204                                    errors.ECODE_INVAL)
8205
8206     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8207       # Check handshake to ensure both clusters have the same domain secret
8208       src_handshake = self.op.source_handshake
8209       if not src_handshake:
8210         raise errors.OpPrereqError("Missing source handshake",
8211                                    errors.ECODE_INVAL)
8212
8213       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8214                                                            src_handshake)
8215       if errmsg:
8216         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8217                                    errors.ECODE_INVAL)
8218
8219       # Load and check source CA
8220       self.source_x509_ca_pem = self.op.source_x509_ca
8221       if not self.source_x509_ca_pem:
8222         raise errors.OpPrereqError("Missing source X509 CA",
8223                                    errors.ECODE_INVAL)
8224
8225       try:
8226         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8227                                                     self._cds)
8228       except OpenSSL.crypto.Error, err:
8229         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8230                                    (err, ), errors.ECODE_INVAL)
8231
8232       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8233       if errcode is not None:
8234         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8235                                    errors.ECODE_INVAL)
8236
8237       self.source_x509_ca = cert
8238
8239       src_instance_name = self.op.source_instance_name
8240       if not src_instance_name:
8241         raise errors.OpPrereqError("Missing source instance name",
8242                                    errors.ECODE_INVAL)
8243
8244       self.source_instance_name = \
8245           netutils.GetHostname(name=src_instance_name).name
8246
8247     else:
8248       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8249                                  self.op.mode, errors.ECODE_INVAL)
8250
8251   def ExpandNames(self):
8252     """ExpandNames for CreateInstance.
8253
8254     Figure out the right locks for instance creation.
8255
8256     """
8257     self.needed_locks = {}
8258
8259     instance_name = self.op.instance_name
8260     # this is just a preventive check, but someone might still add this
8261     # instance in the meantime, and creation will fail at lock-add time
8262     if instance_name in self.cfg.GetInstanceList():
8263       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8264                                  instance_name, errors.ECODE_EXISTS)
8265
8266     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8267
8268     if self.op.iallocator:
8269       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8270     else:
8271       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8272       nodelist = [self.op.pnode]
8273       if self.op.snode is not None:
8274         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8275         nodelist.append(self.op.snode)
8276       self.needed_locks[locking.LEVEL_NODE] = nodelist
8277
8278     # in case of import lock the source node too
8279     if self.op.mode == constants.INSTANCE_IMPORT:
8280       src_node = self.op.src_node
8281       src_path = self.op.src_path
8282
8283       if src_path is None:
8284         self.op.src_path = src_path = self.op.instance_name
8285
8286       if src_node is None:
8287         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8288         self.op.src_node = None
8289         if os.path.isabs(src_path):
8290           raise errors.OpPrereqError("Importing an instance from a path"
8291                                      " requires a source node option",
8292                                      errors.ECODE_INVAL)
8293       else:
8294         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8295         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8296           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8297         if not os.path.isabs(src_path):
8298           self.op.src_path = src_path = \
8299             utils.PathJoin(constants.EXPORT_DIR, src_path)
8300
8301   def _RunAllocator(self):
8302     """Run the allocator based on input opcode.
8303
8304     """
8305     nics = [n.ToDict() for n in self.nics]
8306     ial = IAllocator(self.cfg, self.rpc,
8307                      mode=constants.IALLOCATOR_MODE_ALLOC,
8308                      name=self.op.instance_name,
8309                      disk_template=self.op.disk_template,
8310                      tags=self.op.tags,
8311                      os=self.op.os_type,
8312                      vcpus=self.be_full[constants.BE_VCPUS],
8313                      memory=self.be_full[constants.BE_MEMORY],
8314                      disks=self.disks,
8315                      nics=nics,
8316                      hypervisor=self.op.hypervisor,
8317                      )
8318
8319     ial.Run(self.op.iallocator)
8320
8321     if not ial.success:
8322       raise errors.OpPrereqError("Can't compute nodes using"
8323                                  " iallocator '%s': %s" %
8324                                  (self.op.iallocator, ial.info),
8325                                  errors.ECODE_NORES)
8326     if len(ial.result) != ial.required_nodes:
8327       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8328                                  " of nodes (%s), required %s" %
8329                                  (self.op.iallocator, len(ial.result),
8330                                   ial.required_nodes), errors.ECODE_FAULT)
8331     self.op.pnode = ial.result[0]
8332     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8333                  self.op.instance_name, self.op.iallocator,
8334                  utils.CommaJoin(ial.result))
8335     if ial.required_nodes == 2:
8336       self.op.snode = ial.result[1]
8337
8338   def BuildHooksEnv(self):
8339     """Build hooks env.
8340
8341     This runs on master, primary and secondary nodes of the instance.
8342
8343     """
8344     env = {
8345       "ADD_MODE": self.op.mode,
8346       }
8347     if self.op.mode == constants.INSTANCE_IMPORT:
8348       env["SRC_NODE"] = self.op.src_node
8349       env["SRC_PATH"] = self.op.src_path
8350       env["SRC_IMAGES"] = self.src_images
8351
8352     env.update(_BuildInstanceHookEnv(
8353       name=self.op.instance_name,
8354       primary_node=self.op.pnode,
8355       secondary_nodes=self.secondaries,
8356       status=self.op.start,
8357       os_type=self.op.os_type,
8358       memory=self.be_full[constants.BE_MEMORY],
8359       vcpus=self.be_full[constants.BE_VCPUS],
8360       nics=_NICListToTuple(self, self.nics),
8361       disk_template=self.op.disk_template,
8362       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8363              for d in self.disks],
8364       bep=self.be_full,
8365       hvp=self.hv_full,
8366       hypervisor_name=self.op.hypervisor,
8367       tags=self.op.tags,
8368     ))
8369
8370     return env
8371
8372   def BuildHooksNodes(self):
8373     """Build hooks nodes.
8374
8375     """
8376     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8377     return nl, nl
8378
8379   def _ReadExportInfo(self):
8380     """Reads the export information from disk.
8381
8382     It will override the opcode source node and path with the actual
8383     information, if these two were not specified before.
8384
8385     @return: the export information
8386
8387     """
8388     assert self.op.mode == constants.INSTANCE_IMPORT
8389
8390     src_node = self.op.src_node
8391     src_path = self.op.src_path
8392
8393     if src_node is None:
8394       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8395       exp_list = self.rpc.call_export_list(locked_nodes)
8396       found = False
8397       for node in exp_list:
8398         if exp_list[node].fail_msg:
8399           continue
8400         if src_path in exp_list[node].payload:
8401           found = True
8402           self.op.src_node = src_node = node
8403           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8404                                                        src_path)
8405           break
8406       if not found:
8407         raise errors.OpPrereqError("No export found for relative path %s" %
8408                                     src_path, errors.ECODE_INVAL)
8409
8410     _CheckNodeOnline(self, src_node)
8411     result = self.rpc.call_export_info(src_node, src_path)
8412     result.Raise("No export or invalid export found in dir %s" % src_path)
8413
8414     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8415     if not export_info.has_section(constants.INISECT_EXP):
8416       raise errors.ProgrammerError("Corrupted export config",
8417                                    errors.ECODE_ENVIRON)
8418
8419     ei_version = export_info.get(constants.INISECT_EXP, "version")
8420     if (int(ei_version) != constants.EXPORT_VERSION):
8421       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8422                                  (ei_version, constants.EXPORT_VERSION),
8423                                  errors.ECODE_ENVIRON)
8424     return export_info
8425
8426   def _ReadExportParams(self, einfo):
8427     """Use export parameters as defaults.
8428
8429     In case the opcode doesn't specify (as in override) some instance
8430     parameters, then try to use them from the export information, if
8431     that declares them.
8432
8433     """
8434     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8435
8436     if self.op.disk_template is None:
8437       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8438         self.op.disk_template = einfo.get(constants.INISECT_INS,
8439                                           "disk_template")
8440         if self.op.disk_template not in constants.DISK_TEMPLATES:
8441           raise errors.OpPrereqError("Disk template specified in configuration"
8442                                      " file is not one of the allowed values:"
8443                                      " %s" % " ".join(constants.DISK_TEMPLATES))
8444       else:
8445         raise errors.OpPrereqError("No disk template specified and the export"
8446                                    " is missing the disk_template information",
8447                                    errors.ECODE_INVAL)
8448
8449     if not self.op.disks:
8450       disks = []
8451       # TODO: import the disk iv_name too
8452       for idx in range(constants.MAX_DISKS):
8453         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8454           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8455           disks.append({constants.IDISK_SIZE: disk_sz})
8456       self.op.disks = disks
8457       if not disks and self.op.disk_template != constants.DT_DISKLESS:
8458         raise errors.OpPrereqError("No disk info specified and the export"
8459                                    " is missing the disk information",
8460                                    errors.ECODE_INVAL)
8461
8462     if not self.op.nics:
8463       nics = []
8464       for idx in range(constants.MAX_NICS):
8465         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8466           ndict = {}
8467           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8468             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8469             ndict[name] = v
8470           nics.append(ndict)
8471         else:
8472           break
8473       self.op.nics = nics
8474
8475     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8476       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8477
8478     if (self.op.hypervisor is None and
8479         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8480       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8481
8482     if einfo.has_section(constants.INISECT_HYP):
8483       # use the export parameters but do not override the ones
8484       # specified by the user
8485       for name, value in einfo.items(constants.INISECT_HYP):
8486         if name not in self.op.hvparams:
8487           self.op.hvparams[name] = value
8488
8489     if einfo.has_section(constants.INISECT_BEP):
8490       # use the parameters, without overriding
8491       for name, value in einfo.items(constants.INISECT_BEP):
8492         if name not in self.op.beparams:
8493           self.op.beparams[name] = value
8494     else:
8495       # try to read the parameters old style, from the main section
8496       for name in constants.BES_PARAMETERS:
8497         if (name not in self.op.beparams and
8498             einfo.has_option(constants.INISECT_INS, name)):
8499           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8500
8501     if einfo.has_section(constants.INISECT_OSP):
8502       # use the parameters, without overriding
8503       for name, value in einfo.items(constants.INISECT_OSP):
8504         if name not in self.op.osparams:
8505           self.op.osparams[name] = value
8506
8507   def _RevertToDefaults(self, cluster):
8508     """Revert the instance parameters to the default values.
8509
8510     """
8511     # hvparams
8512     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8513     for name in self.op.hvparams.keys():
8514       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8515         del self.op.hvparams[name]
8516     # beparams
8517     be_defs = cluster.SimpleFillBE({})
8518     for name in self.op.beparams.keys():
8519       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8520         del self.op.beparams[name]
8521     # nic params
8522     nic_defs = cluster.SimpleFillNIC({})
8523     for nic in self.op.nics:
8524       for name in constants.NICS_PARAMETERS:
8525         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8526           del nic[name]
8527     # osparams
8528     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8529     for name in self.op.osparams.keys():
8530       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8531         del self.op.osparams[name]
8532
8533   def _CalculateFileStorageDir(self):
8534     """Calculate final instance file storage dir.
8535
8536     """
8537     # file storage dir calculation/check
8538     self.instance_file_storage_dir = None
8539     if self.op.disk_template in constants.DTS_FILEBASED:
8540       # build the full file storage dir path
8541       joinargs = []
8542
8543       if self.op.disk_template == constants.DT_SHARED_FILE:
8544         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8545       else:
8546         get_fsd_fn = self.cfg.GetFileStorageDir
8547
8548       cfg_storagedir = get_fsd_fn()
8549       if not cfg_storagedir:
8550         raise errors.OpPrereqError("Cluster file storage dir not defined")
8551       joinargs.append(cfg_storagedir)
8552
8553       if self.op.file_storage_dir is not None:
8554         joinargs.append(self.op.file_storage_dir)
8555
8556       joinargs.append(self.op.instance_name)
8557
8558       # pylint: disable=W0142
8559       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8560
8561   def CheckPrereq(self):
8562     """Check prerequisites.
8563
8564     """
8565     self._CalculateFileStorageDir()
8566
8567     if self.op.mode == constants.INSTANCE_IMPORT:
8568       export_info = self._ReadExportInfo()
8569       self._ReadExportParams(export_info)
8570
8571     if (not self.cfg.GetVGName() and
8572         self.op.disk_template not in constants.DTS_NOT_LVM):
8573       raise errors.OpPrereqError("Cluster does not support lvm-based"
8574                                  " instances", errors.ECODE_STATE)
8575
8576     if (self.op.hypervisor is None or
8577         self.op.hypervisor == constants.VALUE_AUTO):
8578       self.op.hypervisor = self.cfg.GetHypervisorType()
8579
8580     cluster = self.cfg.GetClusterInfo()
8581     enabled_hvs = cluster.enabled_hypervisors
8582     if self.op.hypervisor not in enabled_hvs:
8583       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8584                                  " cluster (%s)" % (self.op.hypervisor,
8585                                   ",".join(enabled_hvs)),
8586                                  errors.ECODE_STATE)
8587
8588     # Check tag validity
8589     for tag in self.op.tags:
8590       objects.TaggableObject.ValidateTag(tag)
8591
8592     # check hypervisor parameter syntax (locally)
8593     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8594     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8595                                       self.op.hvparams)
8596     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8597     hv_type.CheckParameterSyntax(filled_hvp)
8598     self.hv_full = filled_hvp
8599     # check that we don't specify global parameters on an instance
8600     _CheckGlobalHvParams(self.op.hvparams)
8601
8602     # fill and remember the beparams dict
8603     default_beparams = cluster.beparams[constants.PP_DEFAULT]
8604     for param, value in self.op.beparams.iteritems():
8605       if value == constants.VALUE_AUTO:
8606         self.op.beparams[param] = default_beparams[param]
8607     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8608     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8609
8610     # build os parameters
8611     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8612
8613     # now that hvp/bep are in final format, let's reset to defaults,
8614     # if told to do so
8615     if self.op.identify_defaults:
8616       self._RevertToDefaults(cluster)
8617
8618     # NIC buildup
8619     self.nics = []
8620     for idx, nic in enumerate(self.op.nics):
8621       nic_mode_req = nic.get(constants.INIC_MODE, None)
8622       nic_mode = nic_mode_req
8623       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8624         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8625
8626       # in routed mode, for the first nic, the default ip is 'auto'
8627       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8628         default_ip_mode = constants.VALUE_AUTO
8629       else:
8630         default_ip_mode = constants.VALUE_NONE
8631
8632       # ip validity checks
8633       ip = nic.get(constants.INIC_IP, default_ip_mode)
8634       if ip is None or ip.lower() == constants.VALUE_NONE:
8635         nic_ip = None
8636       elif ip.lower() == constants.VALUE_AUTO:
8637         if not self.op.name_check:
8638           raise errors.OpPrereqError("IP address set to auto but name checks"
8639                                      " have been skipped",
8640                                      errors.ECODE_INVAL)
8641         nic_ip = self.hostname1.ip
8642       else:
8643         if not netutils.IPAddress.IsValid(ip):
8644           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8645                                      errors.ECODE_INVAL)
8646         nic_ip = ip
8647
8648       # TODO: check the ip address for uniqueness
8649       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8650         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8651                                    errors.ECODE_INVAL)
8652
8653       # MAC address verification
8654       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8655       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8656         mac = utils.NormalizeAndValidateMac(mac)
8657
8658         try:
8659           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8660         except errors.ReservationError:
8661           raise errors.OpPrereqError("MAC address %s already in use"
8662                                      " in cluster" % mac,
8663                                      errors.ECODE_NOTUNIQUE)
8664
8665       #  Build nic parameters
8666       link = nic.get(constants.INIC_LINK, None)
8667       if link == constants.VALUE_AUTO:
8668         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8669       nicparams = {}
8670       if nic_mode_req:
8671         nicparams[constants.NIC_MODE] = nic_mode
8672       if link:
8673         nicparams[constants.NIC_LINK] = link
8674
8675       check_params = cluster.SimpleFillNIC(nicparams)
8676       objects.NIC.CheckParameterSyntax(check_params)
8677       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8678
8679     # disk checks/pre-build
8680     default_vg = self.cfg.GetVGName()
8681     self.disks = []
8682     for disk in self.op.disks:
8683       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8684       if mode not in constants.DISK_ACCESS_SET:
8685         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8686                                    mode, errors.ECODE_INVAL)
8687       size = disk.get(constants.IDISK_SIZE, None)
8688       if size is None:
8689         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8690       try:
8691         size = int(size)
8692       except (TypeError, ValueError):
8693         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8694                                    errors.ECODE_INVAL)
8695
8696       data_vg = disk.get(constants.IDISK_VG, default_vg)
8697       new_disk = {
8698         constants.IDISK_SIZE: size,
8699         constants.IDISK_MODE: mode,
8700         constants.IDISK_VG: data_vg,
8701         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8702         }
8703       if constants.IDISK_ADOPT in disk:
8704         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8705       self.disks.append(new_disk)
8706
8707     if self.op.mode == constants.INSTANCE_IMPORT:
8708       disk_images = []
8709       for idx in range(len(self.disks)):
8710         option = "disk%d_dump" % idx
8711         if export_info.has_option(constants.INISECT_INS, option):
8712           # FIXME: are the old os-es, disk sizes, etc. useful?
8713           export_name = export_info.get(constants.INISECT_INS, option)
8714           image = utils.PathJoin(self.op.src_path, export_name)
8715           disk_images.append(image)
8716         else:
8717           disk_images.append(False)
8718
8719       self.src_images = disk_images
8720
8721       old_name = export_info.get(constants.INISECT_INS, "name")
8722       if self.op.instance_name == old_name:
8723         for idx, nic in enumerate(self.nics):
8724           if nic.mac == constants.VALUE_AUTO:
8725             nic_mac_ini = "nic%d_mac" % idx
8726             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8727
8728     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8729
8730     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8731     if self.op.ip_check:
8732       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8733         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8734                                    (self.check_ip, self.op.instance_name),
8735                                    errors.ECODE_NOTUNIQUE)
8736
8737     #### mac address generation
8738     # By generating here the mac address both the allocator and the hooks get
8739     # the real final mac address rather than the 'auto' or 'generate' value.
8740     # There is a race condition between the generation and the instance object
8741     # creation, which means that we know the mac is valid now, but we're not
8742     # sure it will be when we actually add the instance. If things go bad
8743     # adding the instance will abort because of a duplicate mac, and the
8744     # creation job will fail.
8745     for nic in self.nics:
8746       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8747         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8748
8749     #### allocator run
8750
8751     if self.op.iallocator is not None:
8752       self._RunAllocator()
8753
8754     #### node related checks
8755
8756     # check primary node
8757     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8758     assert self.pnode is not None, \
8759       "Cannot retrieve locked node %s" % self.op.pnode
8760     if pnode.offline:
8761       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8762                                  pnode.name, errors.ECODE_STATE)
8763     if pnode.drained:
8764       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8765                                  pnode.name, errors.ECODE_STATE)
8766     if not pnode.vm_capable:
8767       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8768                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8769
8770     self.secondaries = []
8771
8772     # mirror node verification
8773     if self.op.disk_template in constants.DTS_INT_MIRROR:
8774       if self.op.snode == pnode.name:
8775         raise errors.OpPrereqError("The secondary node cannot be the"
8776                                    " primary node", errors.ECODE_INVAL)
8777       _CheckNodeOnline(self, self.op.snode)
8778       _CheckNodeNotDrained(self, self.op.snode)
8779       _CheckNodeVmCapable(self, self.op.snode)
8780       self.secondaries.append(self.op.snode)
8781
8782     nodenames = [pnode.name] + self.secondaries
8783
8784     if not self.adopt_disks:
8785       # Check lv size requirements, if not adopting
8786       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8787       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8788
8789     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8790       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8791                                 disk[constants.IDISK_ADOPT])
8792                      for disk in self.disks])
8793       if len(all_lvs) != len(self.disks):
8794         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8795                                    errors.ECODE_INVAL)
8796       for lv_name in all_lvs:
8797         try:
8798           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8799           # to ReserveLV uses the same syntax
8800           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8801         except errors.ReservationError:
8802           raise errors.OpPrereqError("LV named %s used by another instance" %
8803                                      lv_name, errors.ECODE_NOTUNIQUE)
8804
8805       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8806       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8807
8808       node_lvs = self.rpc.call_lv_list([pnode.name],
8809                                        vg_names.payload.keys())[pnode.name]
8810       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8811       node_lvs = node_lvs.payload
8812
8813       delta = all_lvs.difference(node_lvs.keys())
8814       if delta:
8815         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8816                                    utils.CommaJoin(delta),
8817                                    errors.ECODE_INVAL)
8818       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8819       if online_lvs:
8820         raise errors.OpPrereqError("Online logical volumes found, cannot"
8821                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8822                                    errors.ECODE_STATE)
8823       # update the size of disk based on what is found
8824       for dsk in self.disks:
8825         dsk[constants.IDISK_SIZE] = \
8826           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8827                                         dsk[constants.IDISK_ADOPT])][0]))
8828
8829     elif self.op.disk_template == constants.DT_BLOCK:
8830       # Normalize and de-duplicate device paths
8831       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8832                        for disk in self.disks])
8833       if len(all_disks) != len(self.disks):
8834         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8835                                    errors.ECODE_INVAL)
8836       baddisks = [d for d in all_disks
8837                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8838       if baddisks:
8839         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8840                                    " cannot be adopted" %
8841                                    (", ".join(baddisks),
8842                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8843                                    errors.ECODE_INVAL)
8844
8845       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8846                                             list(all_disks))[pnode.name]
8847       node_disks.Raise("Cannot get block device information from node %s" %
8848                        pnode.name)
8849       node_disks = node_disks.payload
8850       delta = all_disks.difference(node_disks.keys())
8851       if delta:
8852         raise errors.OpPrereqError("Missing block device(s): %s" %
8853                                    utils.CommaJoin(delta),
8854                                    errors.ECODE_INVAL)
8855       for dsk in self.disks:
8856         dsk[constants.IDISK_SIZE] = \
8857           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8858
8859     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8860
8861     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8862     # check OS parameters (remotely)
8863     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8864
8865     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8866
8867     # memory check on primary node
8868     if self.op.start:
8869       _CheckNodeFreeMemory(self, self.pnode.name,
8870                            "creating instance %s" % self.op.instance_name,
8871                            self.be_full[constants.BE_MEMORY],
8872                            self.op.hypervisor)
8873
8874     self.dry_run_result = list(nodenames)
8875
8876   def Exec(self, feedback_fn):
8877     """Create and add the instance to the cluster.
8878
8879     """
8880     instance = self.op.instance_name
8881     pnode_name = self.pnode.name
8882
8883     ht_kind = self.op.hypervisor
8884     if ht_kind in constants.HTS_REQ_PORT:
8885       network_port = self.cfg.AllocatePort()
8886     else:
8887       network_port = None
8888
8889     disks = _GenerateDiskTemplate(self,
8890                                   self.op.disk_template,
8891                                   instance, pnode_name,
8892                                   self.secondaries,
8893                                   self.disks,
8894                                   self.instance_file_storage_dir,
8895                                   self.op.file_driver,
8896                                   0,
8897                                   feedback_fn)
8898
8899     iobj = objects.Instance(name=instance, os=self.op.os_type,
8900                             primary_node=pnode_name,
8901                             nics=self.nics, disks=disks,
8902                             disk_template=self.op.disk_template,
8903                             admin_up=False,
8904                             network_port=network_port,
8905                             beparams=self.op.beparams,
8906                             hvparams=self.op.hvparams,
8907                             hypervisor=self.op.hypervisor,
8908                             osparams=self.op.osparams,
8909                             )
8910
8911     if self.op.tags:
8912       for tag in self.op.tags:
8913         iobj.AddTag(tag)
8914
8915     if self.adopt_disks:
8916       if self.op.disk_template == constants.DT_PLAIN:
8917         # rename LVs to the newly-generated names; we need to construct
8918         # 'fake' LV disks with the old data, plus the new unique_id
8919         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8920         rename_to = []
8921         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8922           rename_to.append(t_dsk.logical_id)
8923           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8924           self.cfg.SetDiskID(t_dsk, pnode_name)
8925         result = self.rpc.call_blockdev_rename(pnode_name,
8926                                                zip(tmp_disks, rename_to))
8927         result.Raise("Failed to rename adoped LVs")
8928     else:
8929       feedback_fn("* creating instance disks...")
8930       try:
8931         _CreateDisks(self, iobj)
8932       except errors.OpExecError:
8933         self.LogWarning("Device creation failed, reverting...")
8934         try:
8935           _RemoveDisks(self, iobj)
8936         finally:
8937           self.cfg.ReleaseDRBDMinors(instance)
8938           raise
8939
8940     feedback_fn("adding instance %s to cluster config" % instance)
8941
8942     self.cfg.AddInstance(iobj, self.proc.GetECId())
8943
8944     # Declare that we don't want to remove the instance lock anymore, as we've
8945     # added the instance to the config
8946     del self.remove_locks[locking.LEVEL_INSTANCE]
8947
8948     if self.op.mode == constants.INSTANCE_IMPORT:
8949       # Release unused nodes
8950       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8951     else:
8952       # Release all nodes
8953       _ReleaseLocks(self, locking.LEVEL_NODE)
8954
8955     disk_abort = False
8956     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8957       feedback_fn("* wiping instance disks...")
8958       try:
8959         _WipeDisks(self, iobj)
8960       except errors.OpExecError, err:
8961         logging.exception("Wiping disks failed")
8962         self.LogWarning("Wiping instance disks failed (%s)", err)
8963         disk_abort = True
8964
8965     if disk_abort:
8966       # Something is already wrong with the disks, don't do anything else
8967       pass
8968     elif self.op.wait_for_sync:
8969       disk_abort = not _WaitForSync(self, iobj)
8970     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8971       # make sure the disks are not degraded (still sync-ing is ok)
8972       feedback_fn("* checking mirrors status")
8973       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8974     else:
8975       disk_abort = False
8976
8977     if disk_abort:
8978       _RemoveDisks(self, iobj)
8979       self.cfg.RemoveInstance(iobj.name)
8980       # Make sure the instance lock gets removed
8981       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8982       raise errors.OpExecError("There are some degraded disks for"
8983                                " this instance")
8984
8985     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8986       if self.op.mode == constants.INSTANCE_CREATE:
8987         if not self.op.no_install:
8988           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
8989                         not self.op.wait_for_sync)
8990           if pause_sync:
8991             feedback_fn("* pausing disk sync to install instance OS")
8992             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8993                                                               iobj.disks, True)
8994             for idx, success in enumerate(result.payload):
8995               if not success:
8996                 logging.warn("pause-sync of instance %s for disk %d failed",
8997                              instance, idx)
8998
8999           feedback_fn("* running the instance OS create scripts...")
9000           # FIXME: pass debug option from opcode to backend
9001           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
9002                                                  self.op.debug_level)
9003           if pause_sync:
9004             feedback_fn("* resuming disk sync")
9005             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9006                                                               iobj.disks, False)
9007             for idx, success in enumerate(result.payload):
9008               if not success:
9009                 logging.warn("resume-sync of instance %s for disk %d failed",
9010                              instance, idx)
9011
9012           result.Raise("Could not add os for instance %s"
9013                        " on node %s" % (instance, pnode_name))
9014
9015       elif self.op.mode == constants.INSTANCE_IMPORT:
9016         feedback_fn("* running the instance OS import scripts...")
9017
9018         transfers = []
9019
9020         for idx, image in enumerate(self.src_images):
9021           if not image:
9022             continue
9023
9024           # FIXME: pass debug option from opcode to backend
9025           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9026                                              constants.IEIO_FILE, (image, ),
9027                                              constants.IEIO_SCRIPT,
9028                                              (iobj.disks[idx], idx),
9029                                              None)
9030           transfers.append(dt)
9031
9032         import_result = \
9033           masterd.instance.TransferInstanceData(self, feedback_fn,
9034                                                 self.op.src_node, pnode_name,
9035                                                 self.pnode.secondary_ip,
9036                                                 iobj, transfers)
9037         if not compat.all(import_result):
9038           self.LogWarning("Some disks for instance %s on node %s were not"
9039                           " imported successfully" % (instance, pnode_name))
9040
9041       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9042         feedback_fn("* preparing remote import...")
9043         # The source cluster will stop the instance before attempting to make a
9044         # connection. In some cases stopping an instance can take a long time,
9045         # hence the shutdown timeout is added to the connection timeout.
9046         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9047                            self.op.source_shutdown_timeout)
9048         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9049
9050         assert iobj.primary_node == self.pnode.name
9051         disk_results = \
9052           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9053                                         self.source_x509_ca,
9054                                         self._cds, timeouts)
9055         if not compat.all(disk_results):
9056           # TODO: Should the instance still be started, even if some disks
9057           # failed to import (valid for local imports, too)?
9058           self.LogWarning("Some disks for instance %s on node %s were not"
9059                           " imported successfully" % (instance, pnode_name))
9060
9061         # Run rename script on newly imported instance
9062         assert iobj.name == instance
9063         feedback_fn("Running rename script for %s" % instance)
9064         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9065                                                    self.source_instance_name,
9066                                                    self.op.debug_level)
9067         if result.fail_msg:
9068           self.LogWarning("Failed to run rename script for %s on node"
9069                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9070
9071       else:
9072         # also checked in the prereq part
9073         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9074                                      % self.op.mode)
9075
9076     if self.op.start:
9077       iobj.admin_up = True
9078       self.cfg.Update(iobj, feedback_fn)
9079       logging.info("Starting instance %s on node %s", instance, pnode_name)
9080       feedback_fn("* starting instance...")
9081       result = self.rpc.call_instance_start(pnode_name, iobj,
9082                                             None, None, False)
9083       result.Raise("Could not start instance")
9084
9085     return list(iobj.all_nodes)
9086
9087
9088 class LUInstanceConsole(NoHooksLU):
9089   """Connect to an instance's console.
9090
9091   This is somewhat special in that it returns the command line that
9092   you need to run on the master node in order to connect to the
9093   console.
9094
9095   """
9096   REQ_BGL = False
9097
9098   def ExpandNames(self):
9099     self._ExpandAndLockInstance()
9100
9101   def CheckPrereq(self):
9102     """Check prerequisites.
9103
9104     This checks that the instance is in the cluster.
9105
9106     """
9107     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9108     assert self.instance is not None, \
9109       "Cannot retrieve locked instance %s" % self.op.instance_name
9110     _CheckNodeOnline(self, self.instance.primary_node)
9111
9112   def Exec(self, feedback_fn):
9113     """Connect to the console of an instance
9114
9115     """
9116     instance = self.instance
9117     node = instance.primary_node
9118
9119     node_insts = self.rpc.call_instance_list([node],
9120                                              [instance.hypervisor])[node]
9121     node_insts.Raise("Can't get node information from %s" % node)
9122
9123     if instance.name not in node_insts.payload:
9124       if instance.admin_up:
9125         state = constants.INSTST_ERRORDOWN
9126       else:
9127         state = constants.INSTST_ADMINDOWN
9128       raise errors.OpExecError("Instance %s is not running (state %s)" %
9129                                (instance.name, state))
9130
9131     logging.debug("Connecting to console of %s on %s", instance.name, node)
9132
9133     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9134
9135
9136 def _GetInstanceConsole(cluster, instance):
9137   """Returns console information for an instance.
9138
9139   @type cluster: L{objects.Cluster}
9140   @type instance: L{objects.Instance}
9141   @rtype: dict
9142
9143   """
9144   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9145   # beparams and hvparams are passed separately, to avoid editing the
9146   # instance and then saving the defaults in the instance itself.
9147   hvparams = cluster.FillHV(instance)
9148   beparams = cluster.FillBE(instance)
9149   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9150
9151   assert console.instance == instance.name
9152   assert console.Validate()
9153
9154   return console.ToDict()
9155
9156
9157 class LUInstanceReplaceDisks(LogicalUnit):
9158   """Replace the disks of an instance.
9159
9160   """
9161   HPATH = "mirrors-replace"
9162   HTYPE = constants.HTYPE_INSTANCE
9163   REQ_BGL = False
9164
9165   def CheckArguments(self):
9166     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9167                                   self.op.iallocator)
9168
9169   def ExpandNames(self):
9170     self._ExpandAndLockInstance()
9171
9172     assert locking.LEVEL_NODE not in self.needed_locks
9173     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9174
9175     assert self.op.iallocator is None or self.op.remote_node is None, \
9176       "Conflicting options"
9177
9178     if self.op.remote_node is not None:
9179       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9180
9181       # Warning: do not remove the locking of the new secondary here
9182       # unless DRBD8.AddChildren is changed to work in parallel;
9183       # currently it doesn't since parallel invocations of
9184       # FindUnusedMinor will conflict
9185       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9186       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9187     else:
9188       self.needed_locks[locking.LEVEL_NODE] = []
9189       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9190
9191       if self.op.iallocator is not None:
9192         # iallocator will select a new node in the same group
9193         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9194
9195     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9196                                    self.op.iallocator, self.op.remote_node,
9197                                    self.op.disks, False, self.op.early_release)
9198
9199     self.tasklets = [self.replacer]
9200
9201   def DeclareLocks(self, level):
9202     if level == locking.LEVEL_NODEGROUP:
9203       assert self.op.remote_node is None
9204       assert self.op.iallocator is not None
9205       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9206
9207       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9208       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9209         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9210
9211     elif level == locking.LEVEL_NODE:
9212       if self.op.iallocator is not None:
9213         assert self.op.remote_node is None
9214         assert not self.needed_locks[locking.LEVEL_NODE]
9215
9216         # Lock member nodes of all locked groups
9217         self.needed_locks[locking.LEVEL_NODE] = [node_name
9218           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9219           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9220       else:
9221         self._LockInstancesNodes()
9222
9223   def BuildHooksEnv(self):
9224     """Build hooks env.
9225
9226     This runs on the master, the primary and all the secondaries.
9227
9228     """
9229     instance = self.replacer.instance
9230     env = {
9231       "MODE": self.op.mode,
9232       "NEW_SECONDARY": self.op.remote_node,
9233       "OLD_SECONDARY": instance.secondary_nodes[0],
9234       }
9235     env.update(_BuildInstanceHookEnvByObject(self, instance))
9236     return env
9237
9238   def BuildHooksNodes(self):
9239     """Build hooks nodes.
9240
9241     """
9242     instance = self.replacer.instance
9243     nl = [
9244       self.cfg.GetMasterNode(),
9245       instance.primary_node,
9246       ]
9247     if self.op.remote_node is not None:
9248       nl.append(self.op.remote_node)
9249     return nl, nl
9250
9251   def CheckPrereq(self):
9252     """Check prerequisites.
9253
9254     """
9255     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9256             self.op.iallocator is None)
9257
9258     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9259     if owned_groups:
9260       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9261
9262     return LogicalUnit.CheckPrereq(self)
9263
9264
9265 class TLReplaceDisks(Tasklet):
9266   """Replaces disks for an instance.
9267
9268   Note: Locking is not within the scope of this class.
9269
9270   """
9271   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9272                disks, delay_iallocator, early_release):
9273     """Initializes this class.
9274
9275     """
9276     Tasklet.__init__(self, lu)
9277
9278     # Parameters
9279     self.instance_name = instance_name
9280     self.mode = mode
9281     self.iallocator_name = iallocator_name
9282     self.remote_node = remote_node
9283     self.disks = disks
9284     self.delay_iallocator = delay_iallocator
9285     self.early_release = early_release
9286
9287     # Runtime data
9288     self.instance = None
9289     self.new_node = None
9290     self.target_node = None
9291     self.other_node = None
9292     self.remote_node_info = None
9293     self.node_secondary_ip = None
9294
9295   @staticmethod
9296   def CheckArguments(mode, remote_node, iallocator):
9297     """Helper function for users of this class.
9298
9299     """
9300     # check for valid parameter combination
9301     if mode == constants.REPLACE_DISK_CHG:
9302       if remote_node is None and iallocator is None:
9303         raise errors.OpPrereqError("When changing the secondary either an"
9304                                    " iallocator script must be used or the"
9305                                    " new node given", errors.ECODE_INVAL)
9306
9307       if remote_node is not None and iallocator is not None:
9308         raise errors.OpPrereqError("Give either the iallocator or the new"
9309                                    " secondary, not both", errors.ECODE_INVAL)
9310
9311     elif remote_node is not None or iallocator is not None:
9312       # Not replacing the secondary
9313       raise errors.OpPrereqError("The iallocator and new node options can"
9314                                  " only be used when changing the"
9315                                  " secondary node", errors.ECODE_INVAL)
9316
9317   @staticmethod
9318   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9319     """Compute a new secondary node using an IAllocator.
9320
9321     """
9322     ial = IAllocator(lu.cfg, lu.rpc,
9323                      mode=constants.IALLOCATOR_MODE_RELOC,
9324                      name=instance_name,
9325                      relocate_from=list(relocate_from))
9326
9327     ial.Run(iallocator_name)
9328
9329     if not ial.success:
9330       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9331                                  " %s" % (iallocator_name, ial.info),
9332                                  errors.ECODE_NORES)
9333
9334     if len(ial.result) != ial.required_nodes:
9335       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9336                                  " of nodes (%s), required %s" %
9337                                  (iallocator_name,
9338                                   len(ial.result), ial.required_nodes),
9339                                  errors.ECODE_FAULT)
9340
9341     remote_node_name = ial.result[0]
9342
9343     lu.LogInfo("Selected new secondary for instance '%s': %s",
9344                instance_name, remote_node_name)
9345
9346     return remote_node_name
9347
9348   def _FindFaultyDisks(self, node_name):
9349     """Wrapper for L{_FindFaultyInstanceDisks}.
9350
9351     """
9352     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9353                                     node_name, True)
9354
9355   def _CheckDisksActivated(self, instance):
9356     """Checks if the instance disks are activated.
9357
9358     @param instance: The instance to check disks
9359     @return: True if they are activated, False otherwise
9360
9361     """
9362     nodes = instance.all_nodes
9363
9364     for idx, dev in enumerate(instance.disks):
9365       for node in nodes:
9366         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9367         self.cfg.SetDiskID(dev, node)
9368
9369         result = self.rpc.call_blockdev_find(node, dev)
9370
9371         if result.offline:
9372           continue
9373         elif result.fail_msg or not result.payload:
9374           return False
9375
9376     return True
9377
9378   def CheckPrereq(self):
9379     """Check prerequisites.
9380
9381     This checks that the instance is in the cluster.
9382
9383     """
9384     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9385     assert instance is not None, \
9386       "Cannot retrieve locked instance %s" % self.instance_name
9387
9388     if instance.disk_template != constants.DT_DRBD8:
9389       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9390                                  " instances", errors.ECODE_INVAL)
9391
9392     if len(instance.secondary_nodes) != 1:
9393       raise errors.OpPrereqError("The instance has a strange layout,"
9394                                  " expected one secondary but found %d" %
9395                                  len(instance.secondary_nodes),
9396                                  errors.ECODE_FAULT)
9397
9398     if not self.delay_iallocator:
9399       self._CheckPrereq2()
9400
9401   def _CheckPrereq2(self):
9402     """Check prerequisites, second part.
9403
9404     This function should always be part of CheckPrereq. It was separated and is
9405     now called from Exec because during node evacuation iallocator was only
9406     called with an unmodified cluster model, not taking planned changes into
9407     account.
9408
9409     """
9410     instance = self.instance
9411     secondary_node = instance.secondary_nodes[0]
9412
9413     if self.iallocator_name is None:
9414       remote_node = self.remote_node
9415     else:
9416       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9417                                        instance.name, instance.secondary_nodes)
9418
9419     if remote_node is None:
9420       self.remote_node_info = None
9421     else:
9422       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9423              "Remote node '%s' is not locked" % remote_node
9424
9425       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9426       assert self.remote_node_info is not None, \
9427         "Cannot retrieve locked node %s" % remote_node
9428
9429     if remote_node == self.instance.primary_node:
9430       raise errors.OpPrereqError("The specified node is the primary node of"
9431                                  " the instance", errors.ECODE_INVAL)
9432
9433     if remote_node == secondary_node:
9434       raise errors.OpPrereqError("The specified node is already the"
9435                                  " secondary node of the instance",
9436                                  errors.ECODE_INVAL)
9437
9438     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9439                                     constants.REPLACE_DISK_CHG):
9440       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9441                                  errors.ECODE_INVAL)
9442
9443     if self.mode == constants.REPLACE_DISK_AUTO:
9444       if not self._CheckDisksActivated(instance):
9445         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9446                                    " first" % self.instance_name,
9447                                    errors.ECODE_STATE)
9448       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9449       faulty_secondary = self._FindFaultyDisks(secondary_node)
9450
9451       if faulty_primary and faulty_secondary:
9452         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9453                                    " one node and can not be repaired"
9454                                    " automatically" % self.instance_name,
9455                                    errors.ECODE_STATE)
9456
9457       if faulty_primary:
9458         self.disks = faulty_primary
9459         self.target_node = instance.primary_node
9460         self.other_node = secondary_node
9461         check_nodes = [self.target_node, self.other_node]
9462       elif faulty_secondary:
9463         self.disks = faulty_secondary
9464         self.target_node = secondary_node
9465         self.other_node = instance.primary_node
9466         check_nodes = [self.target_node, self.other_node]
9467       else:
9468         self.disks = []
9469         check_nodes = []
9470
9471     else:
9472       # Non-automatic modes
9473       if self.mode == constants.REPLACE_DISK_PRI:
9474         self.target_node = instance.primary_node
9475         self.other_node = secondary_node
9476         check_nodes = [self.target_node, self.other_node]
9477
9478       elif self.mode == constants.REPLACE_DISK_SEC:
9479         self.target_node = secondary_node
9480         self.other_node = instance.primary_node
9481         check_nodes = [self.target_node, self.other_node]
9482
9483       elif self.mode == constants.REPLACE_DISK_CHG:
9484         self.new_node = remote_node
9485         self.other_node = instance.primary_node
9486         self.target_node = secondary_node
9487         check_nodes = [self.new_node, self.other_node]
9488
9489         _CheckNodeNotDrained(self.lu, remote_node)
9490         _CheckNodeVmCapable(self.lu, remote_node)
9491
9492         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9493         assert old_node_info is not None
9494         if old_node_info.offline and not self.early_release:
9495           # doesn't make sense to delay the release
9496           self.early_release = True
9497           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9498                           " early-release mode", secondary_node)
9499
9500       else:
9501         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9502                                      self.mode)
9503
9504       # If not specified all disks should be replaced
9505       if not self.disks:
9506         self.disks = range(len(self.instance.disks))
9507
9508     for node in check_nodes:
9509       _CheckNodeOnline(self.lu, node)
9510
9511     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9512                                                           self.other_node,
9513                                                           self.target_node]
9514                               if node_name is not None)
9515
9516     # Release unneeded node locks
9517     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9518
9519     # Release any owned node group
9520     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9521       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9522
9523     # Check whether disks are valid
9524     for disk_idx in self.disks:
9525       instance.FindDisk(disk_idx)
9526
9527     # Get secondary node IP addresses
9528     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9529                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9530
9531   def Exec(self, feedback_fn):
9532     """Execute disk replacement.
9533
9534     This dispatches the disk replacement to the appropriate handler.
9535
9536     """
9537     if self.delay_iallocator:
9538       self._CheckPrereq2()
9539
9540     if __debug__:
9541       # Verify owned locks before starting operation
9542       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9543       assert set(owned_nodes) == set(self.node_secondary_ip), \
9544           ("Incorrect node locks, owning %s, expected %s" %
9545            (owned_nodes, self.node_secondary_ip.keys()))
9546
9547       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9548       assert list(owned_instances) == [self.instance_name], \
9549           "Instance '%s' not locked" % self.instance_name
9550
9551       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9552           "Should not own any node group lock at this point"
9553
9554     if not self.disks:
9555       feedback_fn("No disks need replacement")
9556       return
9557
9558     feedback_fn("Replacing disk(s) %s for %s" %
9559                 (utils.CommaJoin(self.disks), self.instance.name))
9560
9561     activate_disks = (not self.instance.admin_up)
9562
9563     # Activate the instance disks if we're replacing them on a down instance
9564     if activate_disks:
9565       _StartInstanceDisks(self.lu, self.instance, True)
9566
9567     try:
9568       # Should we replace the secondary node?
9569       if self.new_node is not None:
9570         fn = self._ExecDrbd8Secondary
9571       else:
9572         fn = self._ExecDrbd8DiskOnly
9573
9574       result = fn(feedback_fn)
9575     finally:
9576       # Deactivate the instance disks if we're replacing them on a
9577       # down instance
9578       if activate_disks:
9579         _SafeShutdownInstanceDisks(self.lu, self.instance)
9580
9581     if __debug__:
9582       # Verify owned locks
9583       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9584       nodes = frozenset(self.node_secondary_ip)
9585       assert ((self.early_release and not owned_nodes) or
9586               (not self.early_release and not (set(owned_nodes) - nodes))), \
9587         ("Not owning the correct locks, early_release=%s, owned=%r,"
9588          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9589
9590     return result
9591
9592   def _CheckVolumeGroup(self, nodes):
9593     self.lu.LogInfo("Checking volume groups")
9594
9595     vgname = self.cfg.GetVGName()
9596
9597     # Make sure volume group exists on all involved nodes
9598     results = self.rpc.call_vg_list(nodes)
9599     if not results:
9600       raise errors.OpExecError("Can't list volume groups on the nodes")
9601
9602     for node in nodes:
9603       res = results[node]
9604       res.Raise("Error checking node %s" % node)
9605       if vgname not in res.payload:
9606         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9607                                  (vgname, node))
9608
9609   def _CheckDisksExistence(self, nodes):
9610     # Check disk existence
9611     for idx, dev in enumerate(self.instance.disks):
9612       if idx not in self.disks:
9613         continue
9614
9615       for node in nodes:
9616         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9617         self.cfg.SetDiskID(dev, node)
9618
9619         result = self.rpc.call_blockdev_find(node, dev)
9620
9621         msg = result.fail_msg
9622         if msg or not result.payload:
9623           if not msg:
9624             msg = "disk not found"
9625           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9626                                    (idx, node, msg))
9627
9628   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9629     for idx, dev in enumerate(self.instance.disks):
9630       if idx not in self.disks:
9631         continue
9632
9633       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9634                       (idx, node_name))
9635
9636       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9637                                    ldisk=ldisk):
9638         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9639                                  " replace disks for instance %s" %
9640                                  (node_name, self.instance.name))
9641
9642   def _CreateNewStorage(self, node_name):
9643     """Create new storage on the primary or secondary node.
9644
9645     This is only used for same-node replaces, not for changing the
9646     secondary node, hence we don't want to modify the existing disk.
9647
9648     """
9649     iv_names = {}
9650
9651     for idx, dev in enumerate(self.instance.disks):
9652       if idx not in self.disks:
9653         continue
9654
9655       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9656
9657       self.cfg.SetDiskID(dev, node_name)
9658
9659       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9660       names = _GenerateUniqueNames(self.lu, lv_names)
9661
9662       vg_data = dev.children[0].logical_id[0]
9663       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9664                              logical_id=(vg_data, names[0]))
9665       vg_meta = dev.children[1].logical_id[0]
9666       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9667                              logical_id=(vg_meta, names[1]))
9668
9669       new_lvs = [lv_data, lv_meta]
9670       old_lvs = [child.Copy() for child in dev.children]
9671       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9672
9673       # we pass force_create=True to force the LVM creation
9674       for new_lv in new_lvs:
9675         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9676                         _GetInstanceInfoText(self.instance), False)
9677
9678     return iv_names
9679
9680   def _CheckDevices(self, node_name, iv_names):
9681     for name, (dev, _, _) in iv_names.iteritems():
9682       self.cfg.SetDiskID(dev, node_name)
9683
9684       result = self.rpc.call_blockdev_find(node_name, dev)
9685
9686       msg = result.fail_msg
9687       if msg or not result.payload:
9688         if not msg:
9689           msg = "disk not found"
9690         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9691                                  (name, msg))
9692
9693       if result.payload.is_degraded:
9694         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9695
9696   def _RemoveOldStorage(self, node_name, iv_names):
9697     for name, (_, old_lvs, _) in iv_names.iteritems():
9698       self.lu.LogInfo("Remove logical volumes for %s" % name)
9699
9700       for lv in old_lvs:
9701         self.cfg.SetDiskID(lv, node_name)
9702
9703         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9704         if msg:
9705           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9706                              hint="remove unused LVs manually")
9707
9708   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9709     """Replace a disk on the primary or secondary for DRBD 8.
9710
9711     The algorithm for replace is quite complicated:
9712
9713       1. for each disk to be replaced:
9714
9715         1. create new LVs on the target node with unique names
9716         1. detach old LVs from the drbd device
9717         1. rename old LVs to name_replaced.<time_t>
9718         1. rename new LVs to old LVs
9719         1. attach the new LVs (with the old names now) to the drbd device
9720
9721       1. wait for sync across all devices
9722
9723       1. for each modified disk:
9724
9725         1. remove old LVs (which have the name name_replaces.<time_t>)
9726
9727     Failures are not very well handled.
9728
9729     """
9730     steps_total = 6
9731
9732     # Step: check device activation
9733     self.lu.LogStep(1, steps_total, "Check device existence")
9734     self._CheckDisksExistence([self.other_node, self.target_node])
9735     self._CheckVolumeGroup([self.target_node, self.other_node])
9736
9737     # Step: check other node consistency
9738     self.lu.LogStep(2, steps_total, "Check peer consistency")
9739     self._CheckDisksConsistency(self.other_node,
9740                                 self.other_node == self.instance.primary_node,
9741                                 False)
9742
9743     # Step: create new storage
9744     self.lu.LogStep(3, steps_total, "Allocate new storage")
9745     iv_names = self._CreateNewStorage(self.target_node)
9746
9747     # Step: for each lv, detach+rename*2+attach
9748     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9749     for dev, old_lvs, new_lvs in iv_names.itervalues():
9750       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9751
9752       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9753                                                      old_lvs)
9754       result.Raise("Can't detach drbd from local storage on node"
9755                    " %s for device %s" % (self.target_node, dev.iv_name))
9756       #dev.children = []
9757       #cfg.Update(instance)
9758
9759       # ok, we created the new LVs, so now we know we have the needed
9760       # storage; as such, we proceed on the target node to rename
9761       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9762       # using the assumption that logical_id == physical_id (which in
9763       # turn is the unique_id on that node)
9764
9765       # FIXME(iustin): use a better name for the replaced LVs
9766       temp_suffix = int(time.time())
9767       ren_fn = lambda d, suff: (d.physical_id[0],
9768                                 d.physical_id[1] + "_replaced-%s" % suff)
9769
9770       # Build the rename list based on what LVs exist on the node
9771       rename_old_to_new = []
9772       for to_ren in old_lvs:
9773         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9774         if not result.fail_msg and result.payload:
9775           # device exists
9776           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9777
9778       self.lu.LogInfo("Renaming the old LVs on the target node")
9779       result = self.rpc.call_blockdev_rename(self.target_node,
9780                                              rename_old_to_new)
9781       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9782
9783       # Now we rename the new LVs to the old LVs
9784       self.lu.LogInfo("Renaming the new LVs on the target node")
9785       rename_new_to_old = [(new, old.physical_id)
9786                            for old, new in zip(old_lvs, new_lvs)]
9787       result = self.rpc.call_blockdev_rename(self.target_node,
9788                                              rename_new_to_old)
9789       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9790
9791       # Intermediate steps of in memory modifications
9792       for old, new in zip(old_lvs, new_lvs):
9793         new.logical_id = old.logical_id
9794         self.cfg.SetDiskID(new, self.target_node)
9795
9796       # We need to modify old_lvs so that removal later removes the
9797       # right LVs, not the newly added ones; note that old_lvs is a
9798       # copy here
9799       for disk in old_lvs:
9800         disk.logical_id = ren_fn(disk, temp_suffix)
9801         self.cfg.SetDiskID(disk, self.target_node)
9802
9803       # Now that the new lvs have the old name, we can add them to the device
9804       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9805       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9806                                                   new_lvs)
9807       msg = result.fail_msg
9808       if msg:
9809         for new_lv in new_lvs:
9810           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9811                                                new_lv).fail_msg
9812           if msg2:
9813             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9814                                hint=("cleanup manually the unused logical"
9815                                      "volumes"))
9816         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9817
9818     cstep = 5
9819     if self.early_release:
9820       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9821       cstep += 1
9822       self._RemoveOldStorage(self.target_node, iv_names)
9823       # WARNING: we release both node locks here, do not do other RPCs
9824       # than WaitForSync to the primary node
9825       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9826                     names=[self.target_node, self.other_node])
9827
9828     # Wait for sync
9829     # This can fail as the old devices are degraded and _WaitForSync
9830     # does a combined result over all disks, so we don't check its return value
9831     self.lu.LogStep(cstep, steps_total, "Sync devices")
9832     cstep += 1
9833     _WaitForSync(self.lu, self.instance)
9834
9835     # Check all devices manually
9836     self._CheckDevices(self.instance.primary_node, iv_names)
9837
9838     # Step: remove old storage
9839     if not self.early_release:
9840       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9841       cstep += 1
9842       self._RemoveOldStorage(self.target_node, iv_names)
9843
9844   def _ExecDrbd8Secondary(self, feedback_fn):
9845     """Replace the secondary node for DRBD 8.
9846
9847     The algorithm for replace is quite complicated:
9848       - for all disks of the instance:
9849         - create new LVs on the new node with same names
9850         - shutdown the drbd device on the old secondary
9851         - disconnect the drbd network on the primary
9852         - create the drbd device on the new secondary
9853         - network attach the drbd on the primary, using an artifice:
9854           the drbd code for Attach() will connect to the network if it
9855           finds a device which is connected to the good local disks but
9856           not network enabled
9857       - wait for sync across all devices
9858       - remove all disks from the old secondary
9859
9860     Failures are not very well handled.
9861
9862     """
9863     steps_total = 6
9864
9865     pnode = self.instance.primary_node
9866
9867     # Step: check device activation
9868     self.lu.LogStep(1, steps_total, "Check device existence")
9869     self._CheckDisksExistence([self.instance.primary_node])
9870     self._CheckVolumeGroup([self.instance.primary_node])
9871
9872     # Step: check other node consistency
9873     self.lu.LogStep(2, steps_total, "Check peer consistency")
9874     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9875
9876     # Step: create new storage
9877     self.lu.LogStep(3, steps_total, "Allocate new storage")
9878     for idx, dev in enumerate(self.instance.disks):
9879       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9880                       (self.new_node, idx))
9881       # we pass force_create=True to force LVM creation
9882       for new_lv in dev.children:
9883         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9884                         _GetInstanceInfoText(self.instance), False)
9885
9886     # Step 4: dbrd minors and drbd setups changes
9887     # after this, we must manually remove the drbd minors on both the
9888     # error and the success paths
9889     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9890     minors = self.cfg.AllocateDRBDMinor([self.new_node
9891                                          for dev in self.instance.disks],
9892                                         self.instance.name)
9893     logging.debug("Allocated minors %r", minors)
9894
9895     iv_names = {}
9896     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9897       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9898                       (self.new_node, idx))
9899       # create new devices on new_node; note that we create two IDs:
9900       # one without port, so the drbd will be activated without
9901       # networking information on the new node at this stage, and one
9902       # with network, for the latter activation in step 4
9903       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9904       if self.instance.primary_node == o_node1:
9905         p_minor = o_minor1
9906       else:
9907         assert self.instance.primary_node == o_node2, "Three-node instance?"
9908         p_minor = o_minor2
9909
9910       new_alone_id = (self.instance.primary_node, self.new_node, None,
9911                       p_minor, new_minor, o_secret)
9912       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9913                     p_minor, new_minor, o_secret)
9914
9915       iv_names[idx] = (dev, dev.children, new_net_id)
9916       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9917                     new_net_id)
9918       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9919                               logical_id=new_alone_id,
9920                               children=dev.children,
9921                               size=dev.size)
9922       try:
9923         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9924                               _GetInstanceInfoText(self.instance), False)
9925       except errors.GenericError:
9926         self.cfg.ReleaseDRBDMinors(self.instance.name)
9927         raise
9928
9929     # We have new devices, shutdown the drbd on the old secondary
9930     for idx, dev in enumerate(self.instance.disks):
9931       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9932       self.cfg.SetDiskID(dev, self.target_node)
9933       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9934       if msg:
9935         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9936                            "node: %s" % (idx, msg),
9937                            hint=("Please cleanup this device manually as"
9938                                  " soon as possible"))
9939
9940     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9941     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9942                                                self.instance.disks)[pnode]
9943
9944     msg = result.fail_msg
9945     if msg:
9946       # detaches didn't succeed (unlikely)
9947       self.cfg.ReleaseDRBDMinors(self.instance.name)
9948       raise errors.OpExecError("Can't detach the disks from the network on"
9949                                " old node: %s" % (msg,))
9950
9951     # if we managed to detach at least one, we update all the disks of
9952     # the instance to point to the new secondary
9953     self.lu.LogInfo("Updating instance configuration")
9954     for dev, _, new_logical_id in iv_names.itervalues():
9955       dev.logical_id = new_logical_id
9956       self.cfg.SetDiskID(dev, self.instance.primary_node)
9957
9958     self.cfg.Update(self.instance, feedback_fn)
9959
9960     # and now perform the drbd attach
9961     self.lu.LogInfo("Attaching primary drbds to new secondary"
9962                     " (standalone => connected)")
9963     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9964                                             self.new_node],
9965                                            self.node_secondary_ip,
9966                                            self.instance.disks,
9967                                            self.instance.name,
9968                                            False)
9969     for to_node, to_result in result.items():
9970       msg = to_result.fail_msg
9971       if msg:
9972         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9973                            to_node, msg,
9974                            hint=("please do a gnt-instance info to see the"
9975                                  " status of disks"))
9976     cstep = 5
9977     if self.early_release:
9978       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9979       cstep += 1
9980       self._RemoveOldStorage(self.target_node, iv_names)
9981       # WARNING: we release all node locks here, do not do other RPCs
9982       # than WaitForSync to the primary node
9983       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9984                     names=[self.instance.primary_node,
9985                            self.target_node,
9986                            self.new_node])
9987
9988     # Wait for sync
9989     # This can fail as the old devices are degraded and _WaitForSync
9990     # does a combined result over all disks, so we don't check its return value
9991     self.lu.LogStep(cstep, steps_total, "Sync devices")
9992     cstep += 1
9993     _WaitForSync(self.lu, self.instance)
9994
9995     # Check all devices manually
9996     self._CheckDevices(self.instance.primary_node, iv_names)
9997
9998     # Step: remove old storage
9999     if not self.early_release:
10000       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10001       self._RemoveOldStorage(self.target_node, iv_names)
10002
10003
10004 class LURepairNodeStorage(NoHooksLU):
10005   """Repairs the volume group on a node.
10006
10007   """
10008   REQ_BGL = False
10009
10010   def CheckArguments(self):
10011     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10012
10013     storage_type = self.op.storage_type
10014
10015     if (constants.SO_FIX_CONSISTENCY not in
10016         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10017       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10018                                  " repaired" % storage_type,
10019                                  errors.ECODE_INVAL)
10020
10021   def ExpandNames(self):
10022     self.needed_locks = {
10023       locking.LEVEL_NODE: [self.op.node_name],
10024       }
10025
10026   def _CheckFaultyDisks(self, instance, node_name):
10027     """Ensure faulty disks abort the opcode or at least warn."""
10028     try:
10029       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10030                                   node_name, True):
10031         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10032                                    " node '%s'" % (instance.name, node_name),
10033                                    errors.ECODE_STATE)
10034     except errors.OpPrereqError, err:
10035       if self.op.ignore_consistency:
10036         self.proc.LogWarning(str(err.args[0]))
10037       else:
10038         raise
10039
10040   def CheckPrereq(self):
10041     """Check prerequisites.
10042
10043     """
10044     # Check whether any instance on this node has faulty disks
10045     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10046       if not inst.admin_up:
10047         continue
10048       check_nodes = set(inst.all_nodes)
10049       check_nodes.discard(self.op.node_name)
10050       for inst_node_name in check_nodes:
10051         self._CheckFaultyDisks(inst, inst_node_name)
10052
10053   def Exec(self, feedback_fn):
10054     feedback_fn("Repairing storage unit '%s' on %s ..." %
10055                 (self.op.name, self.op.node_name))
10056
10057     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10058     result = self.rpc.call_storage_execute(self.op.node_name,
10059                                            self.op.storage_type, st_args,
10060                                            self.op.name,
10061                                            constants.SO_FIX_CONSISTENCY)
10062     result.Raise("Failed to repair storage unit '%s' on %s" %
10063                  (self.op.name, self.op.node_name))
10064
10065
10066 class LUNodeEvacuate(NoHooksLU):
10067   """Evacuates instances off a list of nodes.
10068
10069   """
10070   REQ_BGL = False
10071
10072   def CheckArguments(self):
10073     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10074
10075   def ExpandNames(self):
10076     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10077
10078     if self.op.remote_node is not None:
10079       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10080       assert self.op.remote_node
10081
10082       if self.op.remote_node == self.op.node_name:
10083         raise errors.OpPrereqError("Can not use evacuated node as a new"
10084                                    " secondary node", errors.ECODE_INVAL)
10085
10086       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10087         raise errors.OpPrereqError("Without the use of an iallocator only"
10088                                    " secondary instances can be evacuated",
10089                                    errors.ECODE_INVAL)
10090
10091     # Declare locks
10092     self.share_locks = _ShareAll()
10093     self.needed_locks = {
10094       locking.LEVEL_INSTANCE: [],
10095       locking.LEVEL_NODEGROUP: [],
10096       locking.LEVEL_NODE: [],
10097       }
10098
10099     if self.op.remote_node is None:
10100       # Iallocator will choose any node(s) in the same group
10101       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10102     else:
10103       group_nodes = frozenset([self.op.remote_node])
10104
10105     # Determine nodes to be locked
10106     self.lock_nodes = set([self.op.node_name]) | group_nodes
10107
10108   def _DetermineInstances(self):
10109     """Builds list of instances to operate on.
10110
10111     """
10112     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10113
10114     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10115       # Primary instances only
10116       inst_fn = _GetNodePrimaryInstances
10117       assert self.op.remote_node is None, \
10118         "Evacuating primary instances requires iallocator"
10119     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10120       # Secondary instances only
10121       inst_fn = _GetNodeSecondaryInstances
10122     else:
10123       # All instances
10124       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10125       inst_fn = _GetNodeInstances
10126
10127     return inst_fn(self.cfg, self.op.node_name)
10128
10129   def DeclareLocks(self, level):
10130     if level == locking.LEVEL_INSTANCE:
10131       # Lock instances optimistically, needs verification once node and group
10132       # locks have been acquired
10133       self.needed_locks[locking.LEVEL_INSTANCE] = \
10134         set(i.name for i in self._DetermineInstances())
10135
10136     elif level == locking.LEVEL_NODEGROUP:
10137       # Lock node groups optimistically, needs verification once nodes have
10138       # been acquired
10139       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10140         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10141
10142     elif level == locking.LEVEL_NODE:
10143       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10144
10145   def CheckPrereq(self):
10146     # Verify locks
10147     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10148     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10149     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10150
10151     assert owned_nodes == self.lock_nodes
10152
10153     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10154     if owned_groups != wanted_groups:
10155       raise errors.OpExecError("Node groups changed since locks were acquired,"
10156                                " current groups are '%s', used to be '%s'" %
10157                                (utils.CommaJoin(wanted_groups),
10158                                 utils.CommaJoin(owned_groups)))
10159
10160     # Determine affected instances
10161     self.instances = self._DetermineInstances()
10162     self.instance_names = [i.name for i in self.instances]
10163
10164     if set(self.instance_names) != owned_instances:
10165       raise errors.OpExecError("Instances on node '%s' changed since locks"
10166                                " were acquired, current instances are '%s',"
10167                                " used to be '%s'" %
10168                                (self.op.node_name,
10169                                 utils.CommaJoin(self.instance_names),
10170                                 utils.CommaJoin(owned_instances)))
10171
10172     if self.instance_names:
10173       self.LogInfo("Evacuating instances from node '%s': %s",
10174                    self.op.node_name,
10175                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10176     else:
10177       self.LogInfo("No instances to evacuate from node '%s'",
10178                    self.op.node_name)
10179
10180     if self.op.remote_node is not None:
10181       for i in self.instances:
10182         if i.primary_node == self.op.remote_node:
10183           raise errors.OpPrereqError("Node %s is the primary node of"
10184                                      " instance %s, cannot use it as"
10185                                      " secondary" %
10186                                      (self.op.remote_node, i.name),
10187                                      errors.ECODE_INVAL)
10188
10189   def Exec(self, feedback_fn):
10190     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10191
10192     if not self.instance_names:
10193       # No instances to evacuate
10194       jobs = []
10195
10196     elif self.op.iallocator is not None:
10197       # TODO: Implement relocation to other group
10198       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10199                        evac_mode=self.op.mode,
10200                        instances=list(self.instance_names))
10201
10202       ial.Run(self.op.iallocator)
10203
10204       if not ial.success:
10205         raise errors.OpPrereqError("Can't compute node evacuation using"
10206                                    " iallocator '%s': %s" %
10207                                    (self.op.iallocator, ial.info),
10208                                    errors.ECODE_NORES)
10209
10210       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10211
10212     elif self.op.remote_node is not None:
10213       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10214       jobs = [
10215         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10216                                         remote_node=self.op.remote_node,
10217                                         disks=[],
10218                                         mode=constants.REPLACE_DISK_CHG,
10219                                         early_release=self.op.early_release)]
10220         for instance_name in self.instance_names
10221         ]
10222
10223     else:
10224       raise errors.ProgrammerError("No iallocator or remote node")
10225
10226     return ResultWithJobs(jobs)
10227
10228
10229 def _SetOpEarlyRelease(early_release, op):
10230   """Sets C{early_release} flag on opcodes if available.
10231
10232   """
10233   try:
10234     op.early_release = early_release
10235   except AttributeError:
10236     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10237
10238   return op
10239
10240
10241 def _NodeEvacDest(use_nodes, group, nodes):
10242   """Returns group or nodes depending on caller's choice.
10243
10244   """
10245   if use_nodes:
10246     return utils.CommaJoin(nodes)
10247   else:
10248     return group
10249
10250
10251 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10252   """Unpacks the result of change-group and node-evacuate iallocator requests.
10253
10254   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10255   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10256
10257   @type lu: L{LogicalUnit}
10258   @param lu: Logical unit instance
10259   @type alloc_result: tuple/list
10260   @param alloc_result: Result from iallocator
10261   @type early_release: bool
10262   @param early_release: Whether to release locks early if possible
10263   @type use_nodes: bool
10264   @param use_nodes: Whether to display node names instead of groups
10265
10266   """
10267   (moved, failed, jobs) = alloc_result
10268
10269   if failed:
10270     lu.LogWarning("Unable to evacuate instances %s",
10271                   utils.CommaJoin("%s (%s)" % (name, reason)
10272                                   for (name, reason) in failed))
10273
10274   if moved:
10275     lu.LogInfo("Instances to be moved: %s",
10276                utils.CommaJoin("%s (to %s)" %
10277                                (name, _NodeEvacDest(use_nodes, group, nodes))
10278                                for (name, group, nodes) in moved))
10279
10280   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10281               map(opcodes.OpCode.LoadOpCode, ops))
10282           for ops in jobs]
10283
10284
10285 class LUInstanceGrowDisk(LogicalUnit):
10286   """Grow a disk of an instance.
10287
10288   """
10289   HPATH = "disk-grow"
10290   HTYPE = constants.HTYPE_INSTANCE
10291   REQ_BGL = False
10292
10293   def ExpandNames(self):
10294     self._ExpandAndLockInstance()
10295     self.needed_locks[locking.LEVEL_NODE] = []
10296     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10297
10298   def DeclareLocks(self, level):
10299     if level == locking.LEVEL_NODE:
10300       self._LockInstancesNodes()
10301
10302   def BuildHooksEnv(self):
10303     """Build hooks env.
10304
10305     This runs on the master, the primary and all the secondaries.
10306
10307     """
10308     env = {
10309       "DISK": self.op.disk,
10310       "AMOUNT": self.op.amount,
10311       }
10312     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10313     return env
10314
10315   def BuildHooksNodes(self):
10316     """Build hooks nodes.
10317
10318     """
10319     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10320     return (nl, nl)
10321
10322   def CheckPrereq(self):
10323     """Check prerequisites.
10324
10325     This checks that the instance is in the cluster.
10326
10327     """
10328     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10329     assert instance is not None, \
10330       "Cannot retrieve locked instance %s" % self.op.instance_name
10331     nodenames = list(instance.all_nodes)
10332     for node in nodenames:
10333       _CheckNodeOnline(self, node)
10334
10335     self.instance = instance
10336
10337     if instance.disk_template not in constants.DTS_GROWABLE:
10338       raise errors.OpPrereqError("Instance's disk layout does not support"
10339                                  " growing", errors.ECODE_INVAL)
10340
10341     self.disk = instance.FindDisk(self.op.disk)
10342
10343     if instance.disk_template not in (constants.DT_FILE,
10344                                       constants.DT_SHARED_FILE):
10345       # TODO: check the free disk space for file, when that feature will be
10346       # supported
10347       _CheckNodesFreeDiskPerVG(self, nodenames,
10348                                self.disk.ComputeGrowth(self.op.amount))
10349
10350   def Exec(self, feedback_fn):
10351     """Execute disk grow.
10352
10353     """
10354     instance = self.instance
10355     disk = self.disk
10356
10357     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10358     if not disks_ok:
10359       raise errors.OpExecError("Cannot activate block device to grow")
10360
10361     # First run all grow ops in dry-run mode
10362     for node in instance.all_nodes:
10363       self.cfg.SetDiskID(disk, node)
10364       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10365       result.Raise("Grow request failed to node %s" % node)
10366
10367     # We know that (as far as we can test) operations across different
10368     # nodes will succeed, time to run it for real
10369     for node in instance.all_nodes:
10370       self.cfg.SetDiskID(disk, node)
10371       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10372       result.Raise("Grow request failed to node %s" % node)
10373
10374       # TODO: Rewrite code to work properly
10375       # DRBD goes into sync mode for a short amount of time after executing the
10376       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10377       # calling "resize" in sync mode fails. Sleeping for a short amount of
10378       # time is a work-around.
10379       time.sleep(5)
10380
10381     disk.RecordGrow(self.op.amount)
10382     self.cfg.Update(instance, feedback_fn)
10383     if self.op.wait_for_sync:
10384       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10385       if disk_abort:
10386         self.proc.LogWarning("Disk sync-ing has not returned a good"
10387                              " status; please check the instance")
10388       if not instance.admin_up:
10389         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10390     elif not instance.admin_up:
10391       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10392                            " not supposed to be running because no wait for"
10393                            " sync mode was requested")
10394
10395
10396 class LUInstanceQueryData(NoHooksLU):
10397   """Query runtime instance data.
10398
10399   """
10400   REQ_BGL = False
10401
10402   def ExpandNames(self):
10403     self.needed_locks = {}
10404
10405     # Use locking if requested or when non-static information is wanted
10406     if not (self.op.static or self.op.use_locking):
10407       self.LogWarning("Non-static data requested, locks need to be acquired")
10408       self.op.use_locking = True
10409
10410     if self.op.instances or not self.op.use_locking:
10411       # Expand instance names right here
10412       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10413     else:
10414       # Will use acquired locks
10415       self.wanted_names = None
10416
10417     if self.op.use_locking:
10418       self.share_locks = _ShareAll()
10419
10420       if self.wanted_names is None:
10421         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10422       else:
10423         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10424
10425       self.needed_locks[locking.LEVEL_NODE] = []
10426       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10427
10428   def DeclareLocks(self, level):
10429     if self.op.use_locking and level == locking.LEVEL_NODE:
10430       self._LockInstancesNodes()
10431
10432   def CheckPrereq(self):
10433     """Check prerequisites.
10434
10435     This only checks the optional instance list against the existing names.
10436
10437     """
10438     if self.wanted_names is None:
10439       assert self.op.use_locking, "Locking was not used"
10440       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10441
10442     self.wanted_instances = \
10443         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10444
10445   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10446     """Returns the status of a block device
10447
10448     """
10449     if self.op.static or not node:
10450       return None
10451
10452     self.cfg.SetDiskID(dev, node)
10453
10454     result = self.rpc.call_blockdev_find(node, dev)
10455     if result.offline:
10456       return None
10457
10458     result.Raise("Can't compute disk status for %s" % instance_name)
10459
10460     status = result.payload
10461     if status is None:
10462       return None
10463
10464     return (status.dev_path, status.major, status.minor,
10465             status.sync_percent, status.estimated_time,
10466             status.is_degraded, status.ldisk_status)
10467
10468   def _ComputeDiskStatus(self, instance, snode, dev):
10469     """Compute block device status.
10470
10471     """
10472     if dev.dev_type in constants.LDS_DRBD:
10473       # we change the snode then (otherwise we use the one passed in)
10474       if dev.logical_id[0] == instance.primary_node:
10475         snode = dev.logical_id[1]
10476       else:
10477         snode = dev.logical_id[0]
10478
10479     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10480                                               instance.name, dev)
10481     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10482
10483     if dev.children:
10484       dev_children = map(compat.partial(self._ComputeDiskStatus,
10485                                         instance, snode),
10486                          dev.children)
10487     else:
10488       dev_children = []
10489
10490     return {
10491       "iv_name": dev.iv_name,
10492       "dev_type": dev.dev_type,
10493       "logical_id": dev.logical_id,
10494       "physical_id": dev.physical_id,
10495       "pstatus": dev_pstatus,
10496       "sstatus": dev_sstatus,
10497       "children": dev_children,
10498       "mode": dev.mode,
10499       "size": dev.size,
10500       }
10501
10502   def Exec(self, feedback_fn):
10503     """Gather and return data"""
10504     result = {}
10505
10506     cluster = self.cfg.GetClusterInfo()
10507
10508     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10509                                           for i in self.wanted_instances)
10510     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10511       if self.op.static or pnode.offline:
10512         remote_state = None
10513         if pnode.offline:
10514           self.LogWarning("Primary node %s is marked offline, returning static"
10515                           " information only for instance %s" %
10516                           (pnode.name, instance.name))
10517       else:
10518         remote_info = self.rpc.call_instance_info(instance.primary_node,
10519                                                   instance.name,
10520                                                   instance.hypervisor)
10521         remote_info.Raise("Error checking node %s" % instance.primary_node)
10522         remote_info = remote_info.payload
10523         if remote_info and "state" in remote_info:
10524           remote_state = "up"
10525         else:
10526           remote_state = "down"
10527
10528       if instance.admin_up:
10529         config_state = "up"
10530       else:
10531         config_state = "down"
10532
10533       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10534                   instance.disks)
10535
10536       result[instance.name] = {
10537         "name": instance.name,
10538         "config_state": config_state,
10539         "run_state": remote_state,
10540         "pnode": instance.primary_node,
10541         "snodes": instance.secondary_nodes,
10542         "os": instance.os,
10543         # this happens to be the same format used for hooks
10544         "nics": _NICListToTuple(self, instance.nics),
10545         "disk_template": instance.disk_template,
10546         "disks": disks,
10547         "hypervisor": instance.hypervisor,
10548         "network_port": instance.network_port,
10549         "hv_instance": instance.hvparams,
10550         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10551         "be_instance": instance.beparams,
10552         "be_actual": cluster.FillBE(instance),
10553         "os_instance": instance.osparams,
10554         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10555         "serial_no": instance.serial_no,
10556         "mtime": instance.mtime,
10557         "ctime": instance.ctime,
10558         "uuid": instance.uuid,
10559         }
10560
10561     return result
10562
10563
10564 class LUInstanceSetParams(LogicalUnit):
10565   """Modifies an instances's parameters.
10566
10567   """
10568   HPATH = "instance-modify"
10569   HTYPE = constants.HTYPE_INSTANCE
10570   REQ_BGL = False
10571
10572   def CheckArguments(self):
10573     if not (self.op.nics or self.op.disks or self.op.disk_template or
10574             self.op.hvparams or self.op.beparams or self.op.os_name):
10575       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10576
10577     if self.op.hvparams:
10578       _CheckGlobalHvParams(self.op.hvparams)
10579
10580     # Disk validation
10581     disk_addremove = 0
10582     for disk_op, disk_dict in self.op.disks:
10583       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10584       if disk_op == constants.DDM_REMOVE:
10585         disk_addremove += 1
10586         continue
10587       elif disk_op == constants.DDM_ADD:
10588         disk_addremove += 1
10589       else:
10590         if not isinstance(disk_op, int):
10591           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10592         if not isinstance(disk_dict, dict):
10593           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10594           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10595
10596       if disk_op == constants.DDM_ADD:
10597         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10598         if mode not in constants.DISK_ACCESS_SET:
10599           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10600                                      errors.ECODE_INVAL)
10601         size = disk_dict.get(constants.IDISK_SIZE, None)
10602         if size is None:
10603           raise errors.OpPrereqError("Required disk parameter size missing",
10604                                      errors.ECODE_INVAL)
10605         try:
10606           size = int(size)
10607         except (TypeError, ValueError), err:
10608           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10609                                      str(err), errors.ECODE_INVAL)
10610         disk_dict[constants.IDISK_SIZE] = size
10611       else:
10612         # modification of disk
10613         if constants.IDISK_SIZE in disk_dict:
10614           raise errors.OpPrereqError("Disk size change not possible, use"
10615                                      " grow-disk", errors.ECODE_INVAL)
10616
10617     if disk_addremove > 1:
10618       raise errors.OpPrereqError("Only one disk add or remove operation"
10619                                  " supported at a time", errors.ECODE_INVAL)
10620
10621     if self.op.disks and self.op.disk_template is not None:
10622       raise errors.OpPrereqError("Disk template conversion and other disk"
10623                                  " changes not supported at the same time",
10624                                  errors.ECODE_INVAL)
10625
10626     if (self.op.disk_template and
10627         self.op.disk_template in constants.DTS_INT_MIRROR and
10628         self.op.remote_node is None):
10629       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10630                                  " one requires specifying a secondary node",
10631                                  errors.ECODE_INVAL)
10632
10633     # NIC validation
10634     nic_addremove = 0
10635     for nic_op, nic_dict in self.op.nics:
10636       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10637       if nic_op == constants.DDM_REMOVE:
10638         nic_addremove += 1
10639         continue
10640       elif nic_op == constants.DDM_ADD:
10641         nic_addremove += 1
10642       else:
10643         if not isinstance(nic_op, int):
10644           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10645         if not isinstance(nic_dict, dict):
10646           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10647           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10648
10649       # nic_dict should be a dict
10650       nic_ip = nic_dict.get(constants.INIC_IP, None)
10651       if nic_ip is not None:
10652         if nic_ip.lower() == constants.VALUE_NONE:
10653           nic_dict[constants.INIC_IP] = None
10654         else:
10655           if not netutils.IPAddress.IsValid(nic_ip):
10656             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10657                                        errors.ECODE_INVAL)
10658
10659       nic_bridge = nic_dict.get("bridge", None)
10660       nic_link = nic_dict.get(constants.INIC_LINK, None)
10661       if nic_bridge and nic_link:
10662         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10663                                    " at the same time", errors.ECODE_INVAL)
10664       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10665         nic_dict["bridge"] = None
10666       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10667         nic_dict[constants.INIC_LINK] = None
10668
10669       if nic_op == constants.DDM_ADD:
10670         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10671         if nic_mac is None:
10672           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10673
10674       if constants.INIC_MAC in nic_dict:
10675         nic_mac = nic_dict[constants.INIC_MAC]
10676         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10677           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10678
10679         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10680           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10681                                      " modifying an existing nic",
10682                                      errors.ECODE_INVAL)
10683
10684     if nic_addremove > 1:
10685       raise errors.OpPrereqError("Only one NIC add or remove operation"
10686                                  " supported at a time", errors.ECODE_INVAL)
10687
10688   def ExpandNames(self):
10689     self._ExpandAndLockInstance()
10690     self.needed_locks[locking.LEVEL_NODE] = []
10691     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10692
10693   def DeclareLocks(self, level):
10694     if level == locking.LEVEL_NODE:
10695       self._LockInstancesNodes()
10696       if self.op.disk_template and self.op.remote_node:
10697         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10698         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10699
10700   def BuildHooksEnv(self):
10701     """Build hooks env.
10702
10703     This runs on the master, primary and secondaries.
10704
10705     """
10706     args = dict()
10707     if constants.BE_MEMORY in self.be_new:
10708       args["memory"] = self.be_new[constants.BE_MEMORY]
10709     if constants.BE_VCPUS in self.be_new:
10710       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10711     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10712     # information at all.
10713     if self.op.nics:
10714       args["nics"] = []
10715       nic_override = dict(self.op.nics)
10716       for idx, nic in enumerate(self.instance.nics):
10717         if idx in nic_override:
10718           this_nic_override = nic_override[idx]
10719         else:
10720           this_nic_override = {}
10721         if constants.INIC_IP in this_nic_override:
10722           ip = this_nic_override[constants.INIC_IP]
10723         else:
10724           ip = nic.ip
10725         if constants.INIC_MAC in this_nic_override:
10726           mac = this_nic_override[constants.INIC_MAC]
10727         else:
10728           mac = nic.mac
10729         if idx in self.nic_pnew:
10730           nicparams = self.nic_pnew[idx]
10731         else:
10732           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10733         mode = nicparams[constants.NIC_MODE]
10734         link = nicparams[constants.NIC_LINK]
10735         args["nics"].append((ip, mac, mode, link))
10736       if constants.DDM_ADD in nic_override:
10737         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10738         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10739         nicparams = self.nic_pnew[constants.DDM_ADD]
10740         mode = nicparams[constants.NIC_MODE]
10741         link = nicparams[constants.NIC_LINK]
10742         args["nics"].append((ip, mac, mode, link))
10743       elif constants.DDM_REMOVE in nic_override:
10744         del args["nics"][-1]
10745
10746     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10747     if self.op.disk_template:
10748       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10749
10750     return env
10751
10752   def BuildHooksNodes(self):
10753     """Build hooks nodes.
10754
10755     """
10756     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10757     return (nl, nl)
10758
10759   def CheckPrereq(self):
10760     """Check prerequisites.
10761
10762     This only checks the instance list against the existing names.
10763
10764     """
10765     # checking the new params on the primary/secondary nodes
10766
10767     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10768     cluster = self.cluster = self.cfg.GetClusterInfo()
10769     assert self.instance is not None, \
10770       "Cannot retrieve locked instance %s" % self.op.instance_name
10771     pnode = instance.primary_node
10772     nodelist = list(instance.all_nodes)
10773
10774     # OS change
10775     if self.op.os_name and not self.op.force:
10776       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10777                       self.op.force_variant)
10778       instance_os = self.op.os_name
10779     else:
10780       instance_os = instance.os
10781
10782     if self.op.disk_template:
10783       if instance.disk_template == self.op.disk_template:
10784         raise errors.OpPrereqError("Instance already has disk template %s" %
10785                                    instance.disk_template, errors.ECODE_INVAL)
10786
10787       if (instance.disk_template,
10788           self.op.disk_template) not in self._DISK_CONVERSIONS:
10789         raise errors.OpPrereqError("Unsupported disk template conversion from"
10790                                    " %s to %s" % (instance.disk_template,
10791                                                   self.op.disk_template),
10792                                    errors.ECODE_INVAL)
10793       _CheckInstanceDown(self, instance, "cannot change disk template")
10794       if self.op.disk_template in constants.DTS_INT_MIRROR:
10795         if self.op.remote_node == pnode:
10796           raise errors.OpPrereqError("Given new secondary node %s is the same"
10797                                      " as the primary node of the instance" %
10798                                      self.op.remote_node, errors.ECODE_STATE)
10799         _CheckNodeOnline(self, self.op.remote_node)
10800         _CheckNodeNotDrained(self, self.op.remote_node)
10801         # FIXME: here we assume that the old instance type is DT_PLAIN
10802         assert instance.disk_template == constants.DT_PLAIN
10803         disks = [{constants.IDISK_SIZE: d.size,
10804                   constants.IDISK_VG: d.logical_id[0]}
10805                  for d in instance.disks]
10806         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10807         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10808
10809     # hvparams processing
10810     if self.op.hvparams:
10811       hv_type = instance.hypervisor
10812       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10813       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10814       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10815
10816       # local check
10817       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10818       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10819       self.hv_proposed = self.hv_new = hv_new # the new actual values
10820       self.hv_inst = i_hvdict # the new dict (without defaults)
10821     else:
10822       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
10823                                               instance.hvparams)
10824       self.hv_new = self.hv_inst = {}
10825
10826     # beparams processing
10827     if self.op.beparams:
10828       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10829                                    use_none=True)
10830       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10831       be_new = cluster.SimpleFillBE(i_bedict)
10832       self.be_proposed = self.be_new = be_new # the new actual values
10833       self.be_inst = i_bedict # the new dict (without defaults)
10834     else:
10835       self.be_new = self.be_inst = {}
10836       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
10837     be_old = cluster.FillBE(instance)
10838
10839     # CPU param validation -- checking every time a paramtere is
10840     # changed to cover all cases where either CPU mask or vcpus have
10841     # changed
10842     if (constants.BE_VCPUS in self.be_proposed and
10843         constants.HV_CPU_MASK in self.hv_proposed):
10844       cpu_list = \
10845         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
10846       # Verify mask is consistent with number of vCPUs. Can skip this
10847       # test if only 1 entry in the CPU mask, which means same mask
10848       # is applied to all vCPUs.
10849       if (len(cpu_list) > 1 and
10850           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
10851         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
10852                                    " CPU mask [%s]" %
10853                                    (self.be_proposed[constants.BE_VCPUS],
10854                                     self.hv_proposed[constants.HV_CPU_MASK]),
10855                                    errors.ECODE_INVAL)
10856
10857       # Only perform this test if a new CPU mask is given
10858       if constants.HV_CPU_MASK in self.hv_new:
10859         # Calculate the largest CPU number requested
10860         max_requested_cpu = max(map(max, cpu_list))
10861         # Check that all of the instance's nodes have enough physical CPUs to
10862         # satisfy the requested CPU mask
10863         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
10864                                 max_requested_cpu + 1, instance.hypervisor)
10865
10866     # osparams processing
10867     if self.op.osparams:
10868       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10869       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10870       self.os_inst = i_osdict # the new dict (without defaults)
10871     else:
10872       self.os_inst = {}
10873
10874     self.warn = []
10875
10876     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10877         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10878       mem_check_list = [pnode]
10879       if be_new[constants.BE_AUTO_BALANCE]:
10880         # either we changed auto_balance to yes or it was from before
10881         mem_check_list.extend(instance.secondary_nodes)
10882       instance_info = self.rpc.call_instance_info(pnode, instance.name,
10883                                                   instance.hypervisor)
10884       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10885                                          instance.hypervisor)
10886       pninfo = nodeinfo[pnode]
10887       msg = pninfo.fail_msg
10888       if msg:
10889         # Assume the primary node is unreachable and go ahead
10890         self.warn.append("Can't get info from primary node %s: %s" %
10891                          (pnode, msg))
10892       elif not isinstance(pninfo.payload.get("memory_free", None), int):
10893         self.warn.append("Node data from primary node %s doesn't contain"
10894                          " free memory information" % pnode)
10895       elif instance_info.fail_msg:
10896         self.warn.append("Can't get instance runtime information: %s" %
10897                         instance_info.fail_msg)
10898       else:
10899         if instance_info.payload:
10900           current_mem = int(instance_info.payload["memory"])
10901         else:
10902           # Assume instance not running
10903           # (there is a slight race condition here, but it's not very probable,
10904           # and we have no other way to check)
10905           current_mem = 0
10906         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10907                     pninfo.payload["memory_free"])
10908         if miss_mem > 0:
10909           raise errors.OpPrereqError("This change will prevent the instance"
10910                                      " from starting, due to %d MB of memory"
10911                                      " missing on its primary node" % miss_mem,
10912                                      errors.ECODE_NORES)
10913
10914       if be_new[constants.BE_AUTO_BALANCE]:
10915         for node, nres in nodeinfo.items():
10916           if node not in instance.secondary_nodes:
10917             continue
10918           nres.Raise("Can't get info from secondary node %s" % node,
10919                      prereq=True, ecode=errors.ECODE_STATE)
10920           if not isinstance(nres.payload.get("memory_free", None), int):
10921             raise errors.OpPrereqError("Secondary node %s didn't return free"
10922                                        " memory information" % node,
10923                                        errors.ECODE_STATE)
10924           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10925             raise errors.OpPrereqError("This change will prevent the instance"
10926                                        " from failover to its secondary node"
10927                                        " %s, due to not enough memory" % node,
10928                                        errors.ECODE_STATE)
10929
10930     # NIC processing
10931     self.nic_pnew = {}
10932     self.nic_pinst = {}
10933     for nic_op, nic_dict in self.op.nics:
10934       if nic_op == constants.DDM_REMOVE:
10935         if not instance.nics:
10936           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10937                                      errors.ECODE_INVAL)
10938         continue
10939       if nic_op != constants.DDM_ADD:
10940         # an existing nic
10941         if not instance.nics:
10942           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10943                                      " no NICs" % nic_op,
10944                                      errors.ECODE_INVAL)
10945         if nic_op < 0 or nic_op >= len(instance.nics):
10946           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10947                                      " are 0 to %d" %
10948                                      (nic_op, len(instance.nics) - 1),
10949                                      errors.ECODE_INVAL)
10950         old_nic_params = instance.nics[nic_op].nicparams
10951         old_nic_ip = instance.nics[nic_op].ip
10952       else:
10953         old_nic_params = {}
10954         old_nic_ip = None
10955
10956       update_params_dict = dict([(key, nic_dict[key])
10957                                  for key in constants.NICS_PARAMETERS
10958                                  if key in nic_dict])
10959
10960       if "bridge" in nic_dict:
10961         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10962
10963       new_nic_params = _GetUpdatedParams(old_nic_params,
10964                                          update_params_dict)
10965       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10966       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10967       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10968       self.nic_pinst[nic_op] = new_nic_params
10969       self.nic_pnew[nic_op] = new_filled_nic_params
10970       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10971
10972       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10973         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10974         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10975         if msg:
10976           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10977           if self.op.force:
10978             self.warn.append(msg)
10979           else:
10980             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10981       if new_nic_mode == constants.NIC_MODE_ROUTED:
10982         if constants.INIC_IP in nic_dict:
10983           nic_ip = nic_dict[constants.INIC_IP]
10984         else:
10985           nic_ip = old_nic_ip
10986         if nic_ip is None:
10987           raise errors.OpPrereqError("Cannot set the nic ip to None"
10988                                      " on a routed nic", errors.ECODE_INVAL)
10989       if constants.INIC_MAC in nic_dict:
10990         nic_mac = nic_dict[constants.INIC_MAC]
10991         if nic_mac is None:
10992           raise errors.OpPrereqError("Cannot set the nic mac to None",
10993                                      errors.ECODE_INVAL)
10994         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10995           # otherwise generate the mac
10996           nic_dict[constants.INIC_MAC] = \
10997             self.cfg.GenerateMAC(self.proc.GetECId())
10998         else:
10999           # or validate/reserve the current one
11000           try:
11001             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11002           except errors.ReservationError:
11003             raise errors.OpPrereqError("MAC address %s already in use"
11004                                        " in cluster" % nic_mac,
11005                                        errors.ECODE_NOTUNIQUE)
11006
11007     # DISK processing
11008     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11009       raise errors.OpPrereqError("Disk operations not supported for"
11010                                  " diskless instances",
11011                                  errors.ECODE_INVAL)
11012     for disk_op, _ in self.op.disks:
11013       if disk_op == constants.DDM_REMOVE:
11014         if len(instance.disks) == 1:
11015           raise errors.OpPrereqError("Cannot remove the last disk of"
11016                                      " an instance", errors.ECODE_INVAL)
11017         _CheckInstanceDown(self, instance, "cannot remove disks")
11018
11019       if (disk_op == constants.DDM_ADD and
11020           len(instance.disks) >= constants.MAX_DISKS):
11021         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11022                                    " add more" % constants.MAX_DISKS,
11023                                    errors.ECODE_STATE)
11024       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11025         # an existing disk
11026         if disk_op < 0 or disk_op >= len(instance.disks):
11027           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11028                                      " are 0 to %d" %
11029                                      (disk_op, len(instance.disks)),
11030                                      errors.ECODE_INVAL)
11031
11032     return
11033
11034   def _ConvertPlainToDrbd(self, feedback_fn):
11035     """Converts an instance from plain to drbd.
11036
11037     """
11038     feedback_fn("Converting template to drbd")
11039     instance = self.instance
11040     pnode = instance.primary_node
11041     snode = self.op.remote_node
11042
11043     # create a fake disk info for _GenerateDiskTemplate
11044     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11045                   constants.IDISK_VG: d.logical_id[0]}
11046                  for d in instance.disks]
11047     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11048                                       instance.name, pnode, [snode],
11049                                       disk_info, None, None, 0, feedback_fn)
11050     info = _GetInstanceInfoText(instance)
11051     feedback_fn("Creating aditional volumes...")
11052     # first, create the missing data and meta devices
11053     for disk in new_disks:
11054       # unfortunately this is... not too nice
11055       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11056                             info, True)
11057       for child in disk.children:
11058         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11059     # at this stage, all new LVs have been created, we can rename the
11060     # old ones
11061     feedback_fn("Renaming original volumes...")
11062     rename_list = [(o, n.children[0].logical_id)
11063                    for (o, n) in zip(instance.disks, new_disks)]
11064     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11065     result.Raise("Failed to rename original LVs")
11066
11067     feedback_fn("Initializing DRBD devices...")
11068     # all child devices are in place, we can now create the DRBD devices
11069     for disk in new_disks:
11070       for node in [pnode, snode]:
11071         f_create = node == pnode
11072         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11073
11074     # at this point, the instance has been modified
11075     instance.disk_template = constants.DT_DRBD8
11076     instance.disks = new_disks
11077     self.cfg.Update(instance, feedback_fn)
11078
11079     # disks are created, waiting for sync
11080     disk_abort = not _WaitForSync(self, instance,
11081                                   oneshot=not self.op.wait_for_sync)
11082     if disk_abort:
11083       raise errors.OpExecError("There are some degraded disks for"
11084                                " this instance, please cleanup manually")
11085
11086   def _ConvertDrbdToPlain(self, feedback_fn):
11087     """Converts an instance from drbd to plain.
11088
11089     """
11090     instance = self.instance
11091     assert len(instance.secondary_nodes) == 1
11092     pnode = instance.primary_node
11093     snode = instance.secondary_nodes[0]
11094     feedback_fn("Converting template to plain")
11095
11096     old_disks = instance.disks
11097     new_disks = [d.children[0] for d in old_disks]
11098
11099     # copy over size and mode
11100     for parent, child in zip(old_disks, new_disks):
11101       child.size = parent.size
11102       child.mode = parent.mode
11103
11104     # update instance structure
11105     instance.disks = new_disks
11106     instance.disk_template = constants.DT_PLAIN
11107     self.cfg.Update(instance, feedback_fn)
11108
11109     feedback_fn("Removing volumes on the secondary node...")
11110     for disk in old_disks:
11111       self.cfg.SetDiskID(disk, snode)
11112       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11113       if msg:
11114         self.LogWarning("Could not remove block device %s on node %s,"
11115                         " continuing anyway: %s", disk.iv_name, snode, msg)
11116
11117     feedback_fn("Removing unneeded volumes on the primary node...")
11118     for idx, disk in enumerate(old_disks):
11119       meta = disk.children[1]
11120       self.cfg.SetDiskID(meta, pnode)
11121       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11122       if msg:
11123         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11124                         " continuing anyway: %s", idx, pnode, msg)
11125
11126   def Exec(self, feedback_fn):
11127     """Modifies an instance.
11128
11129     All parameters take effect only at the next restart of the instance.
11130
11131     """
11132     # Process here the warnings from CheckPrereq, as we don't have a
11133     # feedback_fn there.
11134     for warn in self.warn:
11135       feedback_fn("WARNING: %s" % warn)
11136
11137     result = []
11138     instance = self.instance
11139     # disk changes
11140     for disk_op, disk_dict in self.op.disks:
11141       if disk_op == constants.DDM_REMOVE:
11142         # remove the last disk
11143         device = instance.disks.pop()
11144         device_idx = len(instance.disks)
11145         for node, disk in device.ComputeNodeTree(instance.primary_node):
11146           self.cfg.SetDiskID(disk, node)
11147           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11148           if msg:
11149             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11150                             " continuing anyway", device_idx, node, msg)
11151         result.append(("disk/%d" % device_idx, "remove"))
11152       elif disk_op == constants.DDM_ADD:
11153         # add a new disk
11154         if instance.disk_template in (constants.DT_FILE,
11155                                         constants.DT_SHARED_FILE):
11156           file_driver, file_path = instance.disks[0].logical_id
11157           file_path = os.path.dirname(file_path)
11158         else:
11159           file_driver = file_path = None
11160         disk_idx_base = len(instance.disks)
11161         new_disk = _GenerateDiskTemplate(self,
11162                                          instance.disk_template,
11163                                          instance.name, instance.primary_node,
11164                                          instance.secondary_nodes,
11165                                          [disk_dict],
11166                                          file_path,
11167                                          file_driver,
11168                                          disk_idx_base, feedback_fn)[0]
11169         instance.disks.append(new_disk)
11170         info = _GetInstanceInfoText(instance)
11171
11172         logging.info("Creating volume %s for instance %s",
11173                      new_disk.iv_name, instance.name)
11174         # Note: this needs to be kept in sync with _CreateDisks
11175         #HARDCODE
11176         for node in instance.all_nodes:
11177           f_create = node == instance.primary_node
11178           try:
11179             _CreateBlockDev(self, node, instance, new_disk,
11180                             f_create, info, f_create)
11181           except errors.OpExecError, err:
11182             self.LogWarning("Failed to create volume %s (%s) on"
11183                             " node %s: %s",
11184                             new_disk.iv_name, new_disk, node, err)
11185         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11186                        (new_disk.size, new_disk.mode)))
11187       else:
11188         # change a given disk
11189         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11190         result.append(("disk.mode/%d" % disk_op,
11191                        disk_dict[constants.IDISK_MODE]))
11192
11193     if self.op.disk_template:
11194       r_shut = _ShutdownInstanceDisks(self, instance)
11195       if not r_shut:
11196         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11197                                  " proceed with disk template conversion")
11198       mode = (instance.disk_template, self.op.disk_template)
11199       try:
11200         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11201       except:
11202         self.cfg.ReleaseDRBDMinors(instance.name)
11203         raise
11204       result.append(("disk_template", self.op.disk_template))
11205
11206     # NIC changes
11207     for nic_op, nic_dict in self.op.nics:
11208       if nic_op == constants.DDM_REMOVE:
11209         # remove the last nic
11210         del instance.nics[-1]
11211         result.append(("nic.%d" % len(instance.nics), "remove"))
11212       elif nic_op == constants.DDM_ADD:
11213         # mac and bridge should be set, by now
11214         mac = nic_dict[constants.INIC_MAC]
11215         ip = nic_dict.get(constants.INIC_IP, None)
11216         nicparams = self.nic_pinst[constants.DDM_ADD]
11217         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11218         instance.nics.append(new_nic)
11219         result.append(("nic.%d" % (len(instance.nics) - 1),
11220                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11221                        (new_nic.mac, new_nic.ip,
11222                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11223                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11224                        )))
11225       else:
11226         for key in (constants.INIC_MAC, constants.INIC_IP):
11227           if key in nic_dict:
11228             setattr(instance.nics[nic_op], key, nic_dict[key])
11229         if nic_op in self.nic_pinst:
11230           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11231         for key, val in nic_dict.iteritems():
11232           result.append(("nic.%s/%d" % (key, nic_op), val))
11233
11234     # hvparams changes
11235     if self.op.hvparams:
11236       instance.hvparams = self.hv_inst
11237       for key, val in self.op.hvparams.iteritems():
11238         result.append(("hv/%s" % key, val))
11239
11240     # beparams changes
11241     if self.op.beparams:
11242       instance.beparams = self.be_inst
11243       for key, val in self.op.beparams.iteritems():
11244         result.append(("be/%s" % key, val))
11245
11246     # OS change
11247     if self.op.os_name:
11248       instance.os = self.op.os_name
11249
11250     # osparams changes
11251     if self.op.osparams:
11252       instance.osparams = self.os_inst
11253       for key, val in self.op.osparams.iteritems():
11254         result.append(("os/%s" % key, val))
11255
11256     self.cfg.Update(instance, feedback_fn)
11257
11258     return result
11259
11260   _DISK_CONVERSIONS = {
11261     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11262     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11263     }
11264
11265
11266 class LUInstanceChangeGroup(LogicalUnit):
11267   HPATH = "instance-change-group"
11268   HTYPE = constants.HTYPE_INSTANCE
11269   REQ_BGL = False
11270
11271   def ExpandNames(self):
11272     self.share_locks = _ShareAll()
11273     self.needed_locks = {
11274       locking.LEVEL_NODEGROUP: [],
11275       locking.LEVEL_NODE: [],
11276       }
11277
11278     self._ExpandAndLockInstance()
11279
11280     if self.op.target_groups:
11281       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11282                                   self.op.target_groups)
11283     else:
11284       self.req_target_uuids = None
11285
11286     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11287
11288   def DeclareLocks(self, level):
11289     if level == locking.LEVEL_NODEGROUP:
11290       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11291
11292       if self.req_target_uuids:
11293         lock_groups = set(self.req_target_uuids)
11294
11295         # Lock all groups used by instance optimistically; this requires going
11296         # via the node before it's locked, requiring verification later on
11297         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11298         lock_groups.update(instance_groups)
11299       else:
11300         # No target groups, need to lock all of them
11301         lock_groups = locking.ALL_SET
11302
11303       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11304
11305     elif level == locking.LEVEL_NODE:
11306       if self.req_target_uuids:
11307         # Lock all nodes used by instances
11308         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11309         self._LockInstancesNodes()
11310
11311         # Lock all nodes in all potential target groups
11312         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11313                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11314         member_nodes = [node_name
11315                         for group in lock_groups
11316                         for node_name in self.cfg.GetNodeGroup(group).members]
11317         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11318       else:
11319         # Lock all nodes as all groups are potential targets
11320         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11321
11322   def CheckPrereq(self):
11323     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11324     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11325     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11326
11327     assert (self.req_target_uuids is None or
11328             owned_groups.issuperset(self.req_target_uuids))
11329     assert owned_instances == set([self.op.instance_name])
11330
11331     # Get instance information
11332     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11333
11334     # Check if node groups for locked instance are still correct
11335     assert owned_nodes.issuperset(self.instance.all_nodes), \
11336       ("Instance %s's nodes changed while we kept the lock" %
11337        self.op.instance_name)
11338
11339     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11340                                            owned_groups)
11341
11342     if self.req_target_uuids:
11343       # User requested specific target groups
11344       self.target_uuids = self.req_target_uuids
11345     else:
11346       # All groups except those used by the instance are potential targets
11347       self.target_uuids = owned_groups - inst_groups
11348
11349     conflicting_groups = self.target_uuids & inst_groups
11350     if conflicting_groups:
11351       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11352                                  " used by the instance '%s'" %
11353                                  (utils.CommaJoin(conflicting_groups),
11354                                   self.op.instance_name),
11355                                  errors.ECODE_INVAL)
11356
11357     if not self.target_uuids:
11358       raise errors.OpPrereqError("There are no possible target groups",
11359                                  errors.ECODE_INVAL)
11360
11361   def BuildHooksEnv(self):
11362     """Build hooks env.
11363
11364     """
11365     assert self.target_uuids
11366
11367     env = {
11368       "TARGET_GROUPS": " ".join(self.target_uuids),
11369       }
11370
11371     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11372
11373     return env
11374
11375   def BuildHooksNodes(self):
11376     """Build hooks nodes.
11377
11378     """
11379     mn = self.cfg.GetMasterNode()
11380     return ([mn], [mn])
11381
11382   def Exec(self, feedback_fn):
11383     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11384
11385     assert instances == [self.op.instance_name], "Instance not locked"
11386
11387     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11388                      instances=instances, target_groups=list(self.target_uuids))
11389
11390     ial.Run(self.op.iallocator)
11391
11392     if not ial.success:
11393       raise errors.OpPrereqError("Can't compute solution for changing group of"
11394                                  " instance '%s' using iallocator '%s': %s" %
11395                                  (self.op.instance_name, self.op.iallocator,
11396                                   ial.info),
11397                                  errors.ECODE_NORES)
11398
11399     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11400
11401     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11402                  " instance '%s'", len(jobs), self.op.instance_name)
11403
11404     return ResultWithJobs(jobs)
11405
11406
11407 class LUBackupQuery(NoHooksLU):
11408   """Query the exports list
11409
11410   """
11411   REQ_BGL = False
11412
11413   def ExpandNames(self):
11414     self.needed_locks = {}
11415     self.share_locks[locking.LEVEL_NODE] = 1
11416     if not self.op.nodes:
11417       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11418     else:
11419       self.needed_locks[locking.LEVEL_NODE] = \
11420         _GetWantedNodes(self, self.op.nodes)
11421
11422   def Exec(self, feedback_fn):
11423     """Compute the list of all the exported system images.
11424
11425     @rtype: dict
11426     @return: a dictionary with the structure node->(export-list)
11427         where export-list is a list of the instances exported on
11428         that node.
11429
11430     """
11431     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11432     rpcresult = self.rpc.call_export_list(self.nodes)
11433     result = {}
11434     for node in rpcresult:
11435       if rpcresult[node].fail_msg:
11436         result[node] = False
11437       else:
11438         result[node] = rpcresult[node].payload
11439
11440     return result
11441
11442
11443 class LUBackupPrepare(NoHooksLU):
11444   """Prepares an instance for an export and returns useful information.
11445
11446   """
11447   REQ_BGL = False
11448
11449   def ExpandNames(self):
11450     self._ExpandAndLockInstance()
11451
11452   def CheckPrereq(self):
11453     """Check prerequisites.
11454
11455     """
11456     instance_name = self.op.instance_name
11457
11458     self.instance = self.cfg.GetInstanceInfo(instance_name)
11459     assert self.instance is not None, \
11460           "Cannot retrieve locked instance %s" % self.op.instance_name
11461     _CheckNodeOnline(self, self.instance.primary_node)
11462
11463     self._cds = _GetClusterDomainSecret()
11464
11465   def Exec(self, feedback_fn):
11466     """Prepares an instance for an export.
11467
11468     """
11469     instance = self.instance
11470
11471     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11472       salt = utils.GenerateSecret(8)
11473
11474       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11475       result = self.rpc.call_x509_cert_create(instance.primary_node,
11476                                               constants.RIE_CERT_VALIDITY)
11477       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11478
11479       (name, cert_pem) = result.payload
11480
11481       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11482                                              cert_pem)
11483
11484       return {
11485         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11486         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11487                           salt),
11488         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11489         }
11490
11491     return None
11492
11493
11494 class LUBackupExport(LogicalUnit):
11495   """Export an instance to an image in the cluster.
11496
11497   """
11498   HPATH = "instance-export"
11499   HTYPE = constants.HTYPE_INSTANCE
11500   REQ_BGL = False
11501
11502   def CheckArguments(self):
11503     """Check the arguments.
11504
11505     """
11506     self.x509_key_name = self.op.x509_key_name
11507     self.dest_x509_ca_pem = self.op.destination_x509_ca
11508
11509     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11510       if not self.x509_key_name:
11511         raise errors.OpPrereqError("Missing X509 key name for encryption",
11512                                    errors.ECODE_INVAL)
11513
11514       if not self.dest_x509_ca_pem:
11515         raise errors.OpPrereqError("Missing destination X509 CA",
11516                                    errors.ECODE_INVAL)
11517
11518   def ExpandNames(self):
11519     self._ExpandAndLockInstance()
11520
11521     # Lock all nodes for local exports
11522     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11523       # FIXME: lock only instance primary and destination node
11524       #
11525       # Sad but true, for now we have do lock all nodes, as we don't know where
11526       # the previous export might be, and in this LU we search for it and
11527       # remove it from its current node. In the future we could fix this by:
11528       #  - making a tasklet to search (share-lock all), then create the
11529       #    new one, then one to remove, after
11530       #  - removing the removal operation altogether
11531       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11532
11533   def DeclareLocks(self, level):
11534     """Last minute lock declaration."""
11535     # All nodes are locked anyway, so nothing to do here.
11536
11537   def BuildHooksEnv(self):
11538     """Build hooks env.
11539
11540     This will run on the master, primary node and target node.
11541
11542     """
11543     env = {
11544       "EXPORT_MODE": self.op.mode,
11545       "EXPORT_NODE": self.op.target_node,
11546       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11547       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11548       # TODO: Generic function for boolean env variables
11549       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11550       }
11551
11552     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11553
11554     return env
11555
11556   def BuildHooksNodes(self):
11557     """Build hooks nodes.
11558
11559     """
11560     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11561
11562     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11563       nl.append(self.op.target_node)
11564
11565     return (nl, nl)
11566
11567   def CheckPrereq(self):
11568     """Check prerequisites.
11569
11570     This checks that the instance and node names are valid.
11571
11572     """
11573     instance_name = self.op.instance_name
11574
11575     self.instance = self.cfg.GetInstanceInfo(instance_name)
11576     assert self.instance is not None, \
11577           "Cannot retrieve locked instance %s" % self.op.instance_name
11578     _CheckNodeOnline(self, self.instance.primary_node)
11579
11580     if (self.op.remove_instance and self.instance.admin_up and
11581         not self.op.shutdown):
11582       raise errors.OpPrereqError("Can not remove instance without shutting it"
11583                                  " down before")
11584
11585     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11586       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11587       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11588       assert self.dst_node is not None
11589
11590       _CheckNodeOnline(self, self.dst_node.name)
11591       _CheckNodeNotDrained(self, self.dst_node.name)
11592
11593       self._cds = None
11594       self.dest_disk_info = None
11595       self.dest_x509_ca = None
11596
11597     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11598       self.dst_node = None
11599
11600       if len(self.op.target_node) != len(self.instance.disks):
11601         raise errors.OpPrereqError(("Received destination information for %s"
11602                                     " disks, but instance %s has %s disks") %
11603                                    (len(self.op.target_node), instance_name,
11604                                     len(self.instance.disks)),
11605                                    errors.ECODE_INVAL)
11606
11607       cds = _GetClusterDomainSecret()
11608
11609       # Check X509 key name
11610       try:
11611         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11612       except (TypeError, ValueError), err:
11613         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11614
11615       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11616         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11617                                    errors.ECODE_INVAL)
11618
11619       # Load and verify CA
11620       try:
11621         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11622       except OpenSSL.crypto.Error, err:
11623         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11624                                    (err, ), errors.ECODE_INVAL)
11625
11626       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11627       if errcode is not None:
11628         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11629                                    (msg, ), errors.ECODE_INVAL)
11630
11631       self.dest_x509_ca = cert
11632
11633       # Verify target information
11634       disk_info = []
11635       for idx, disk_data in enumerate(self.op.target_node):
11636         try:
11637           (host, port, magic) = \
11638             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11639         except errors.GenericError, err:
11640           raise errors.OpPrereqError("Target info for disk %s: %s" %
11641                                      (idx, err), errors.ECODE_INVAL)
11642
11643         disk_info.append((host, port, magic))
11644
11645       assert len(disk_info) == len(self.op.target_node)
11646       self.dest_disk_info = disk_info
11647
11648     else:
11649       raise errors.ProgrammerError("Unhandled export mode %r" %
11650                                    self.op.mode)
11651
11652     # instance disk type verification
11653     # TODO: Implement export support for file-based disks
11654     for disk in self.instance.disks:
11655       if disk.dev_type == constants.LD_FILE:
11656         raise errors.OpPrereqError("Export not supported for instances with"
11657                                    " file-based disks", errors.ECODE_INVAL)
11658
11659   def _CleanupExports(self, feedback_fn):
11660     """Removes exports of current instance from all other nodes.
11661
11662     If an instance in a cluster with nodes A..D was exported to node C, its
11663     exports will be removed from the nodes A, B and D.
11664
11665     """
11666     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11667
11668     nodelist = self.cfg.GetNodeList()
11669     nodelist.remove(self.dst_node.name)
11670
11671     # on one-node clusters nodelist will be empty after the removal
11672     # if we proceed the backup would be removed because OpBackupQuery
11673     # substitutes an empty list with the full cluster node list.
11674     iname = self.instance.name
11675     if nodelist:
11676       feedback_fn("Removing old exports for instance %s" % iname)
11677       exportlist = self.rpc.call_export_list(nodelist)
11678       for node in exportlist:
11679         if exportlist[node].fail_msg:
11680           continue
11681         if iname in exportlist[node].payload:
11682           msg = self.rpc.call_export_remove(node, iname).fail_msg
11683           if msg:
11684             self.LogWarning("Could not remove older export for instance %s"
11685                             " on node %s: %s", iname, node, msg)
11686
11687   def Exec(self, feedback_fn):
11688     """Export an instance to an image in the cluster.
11689
11690     """
11691     assert self.op.mode in constants.EXPORT_MODES
11692
11693     instance = self.instance
11694     src_node = instance.primary_node
11695
11696     if self.op.shutdown:
11697       # shutdown the instance, but not the disks
11698       feedback_fn("Shutting down instance %s" % instance.name)
11699       result = self.rpc.call_instance_shutdown(src_node, instance,
11700                                                self.op.shutdown_timeout)
11701       # TODO: Maybe ignore failures if ignore_remove_failures is set
11702       result.Raise("Could not shutdown instance %s on"
11703                    " node %s" % (instance.name, src_node))
11704
11705     # set the disks ID correctly since call_instance_start needs the
11706     # correct drbd minor to create the symlinks
11707     for disk in instance.disks:
11708       self.cfg.SetDiskID(disk, src_node)
11709
11710     activate_disks = (not instance.admin_up)
11711
11712     if activate_disks:
11713       # Activate the instance disks if we'exporting a stopped instance
11714       feedback_fn("Activating disks for %s" % instance.name)
11715       _StartInstanceDisks(self, instance, None)
11716
11717     try:
11718       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11719                                                      instance)
11720
11721       helper.CreateSnapshots()
11722       try:
11723         if (self.op.shutdown and instance.admin_up and
11724             not self.op.remove_instance):
11725           assert not activate_disks
11726           feedback_fn("Starting instance %s" % instance.name)
11727           result = self.rpc.call_instance_start(src_node, instance,
11728                                                 None, None, False)
11729           msg = result.fail_msg
11730           if msg:
11731             feedback_fn("Failed to start instance: %s" % msg)
11732             _ShutdownInstanceDisks(self, instance)
11733             raise errors.OpExecError("Could not start instance: %s" % msg)
11734
11735         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11736           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11737         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11738           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11739           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11740
11741           (key_name, _, _) = self.x509_key_name
11742
11743           dest_ca_pem = \
11744             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11745                                             self.dest_x509_ca)
11746
11747           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11748                                                      key_name, dest_ca_pem,
11749                                                      timeouts)
11750       finally:
11751         helper.Cleanup()
11752
11753       # Check for backwards compatibility
11754       assert len(dresults) == len(instance.disks)
11755       assert compat.all(isinstance(i, bool) for i in dresults), \
11756              "Not all results are boolean: %r" % dresults
11757
11758     finally:
11759       if activate_disks:
11760         feedback_fn("Deactivating disks for %s" % instance.name)
11761         _ShutdownInstanceDisks(self, instance)
11762
11763     if not (compat.all(dresults) and fin_resu):
11764       failures = []
11765       if not fin_resu:
11766         failures.append("export finalization")
11767       if not compat.all(dresults):
11768         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11769                                if not dsk)
11770         failures.append("disk export: disk(s) %s" % fdsk)
11771
11772       raise errors.OpExecError("Export failed, errors in %s" %
11773                                utils.CommaJoin(failures))
11774
11775     # At this point, the export was successful, we can cleanup/finish
11776
11777     # Remove instance if requested
11778     if self.op.remove_instance:
11779       feedback_fn("Removing instance %s" % instance.name)
11780       _RemoveInstance(self, feedback_fn, instance,
11781                       self.op.ignore_remove_failures)
11782
11783     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11784       self._CleanupExports(feedback_fn)
11785
11786     return fin_resu, dresults
11787
11788
11789 class LUBackupRemove(NoHooksLU):
11790   """Remove exports related to the named instance.
11791
11792   """
11793   REQ_BGL = False
11794
11795   def ExpandNames(self):
11796     self.needed_locks = {}
11797     # We need all nodes to be locked in order for RemoveExport to work, but we
11798     # don't need to lock the instance itself, as nothing will happen to it (and
11799     # we can remove exports also for a removed instance)
11800     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11801
11802   def Exec(self, feedback_fn):
11803     """Remove any export.
11804
11805     """
11806     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11807     # If the instance was not found we'll try with the name that was passed in.
11808     # This will only work if it was an FQDN, though.
11809     fqdn_warn = False
11810     if not instance_name:
11811       fqdn_warn = True
11812       instance_name = self.op.instance_name
11813
11814     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11815     exportlist = self.rpc.call_export_list(locked_nodes)
11816     found = False
11817     for node in exportlist:
11818       msg = exportlist[node].fail_msg
11819       if msg:
11820         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11821         continue
11822       if instance_name in exportlist[node].payload:
11823         found = True
11824         result = self.rpc.call_export_remove(node, instance_name)
11825         msg = result.fail_msg
11826         if msg:
11827           logging.error("Could not remove export for instance %s"
11828                         " on node %s: %s", instance_name, node, msg)
11829
11830     if fqdn_warn and not found:
11831       feedback_fn("Export not found. If trying to remove an export belonging"
11832                   " to a deleted instance please use its Fully Qualified"
11833                   " Domain Name.")
11834
11835
11836 class LUGroupAdd(LogicalUnit):
11837   """Logical unit for creating node groups.
11838
11839   """
11840   HPATH = "group-add"
11841   HTYPE = constants.HTYPE_GROUP
11842   REQ_BGL = False
11843
11844   def ExpandNames(self):
11845     # We need the new group's UUID here so that we can create and acquire the
11846     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11847     # that it should not check whether the UUID exists in the configuration.
11848     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11849     self.needed_locks = {}
11850     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11851
11852   def CheckPrereq(self):
11853     """Check prerequisites.
11854
11855     This checks that the given group name is not an existing node group
11856     already.
11857
11858     """
11859     try:
11860       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11861     except errors.OpPrereqError:
11862       pass
11863     else:
11864       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11865                                  " node group (UUID: %s)" %
11866                                  (self.op.group_name, existing_uuid),
11867                                  errors.ECODE_EXISTS)
11868
11869     if self.op.ndparams:
11870       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11871
11872   def BuildHooksEnv(self):
11873     """Build hooks env.
11874
11875     """
11876     return {
11877       "GROUP_NAME": self.op.group_name,
11878       }
11879
11880   def BuildHooksNodes(self):
11881     """Build hooks nodes.
11882
11883     """
11884     mn = self.cfg.GetMasterNode()
11885     return ([mn], [mn])
11886
11887   def Exec(self, feedback_fn):
11888     """Add the node group to the cluster.
11889
11890     """
11891     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11892                                   uuid=self.group_uuid,
11893                                   alloc_policy=self.op.alloc_policy,
11894                                   ndparams=self.op.ndparams)
11895
11896     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11897     del self.remove_locks[locking.LEVEL_NODEGROUP]
11898
11899
11900 class LUGroupAssignNodes(NoHooksLU):
11901   """Logical unit for assigning nodes to groups.
11902
11903   """
11904   REQ_BGL = False
11905
11906   def ExpandNames(self):
11907     # These raise errors.OpPrereqError on their own:
11908     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11909     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11910
11911     # We want to lock all the affected nodes and groups. We have readily
11912     # available the list of nodes, and the *destination* group. To gather the
11913     # list of "source" groups, we need to fetch node information later on.
11914     self.needed_locks = {
11915       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11916       locking.LEVEL_NODE: self.op.nodes,
11917       }
11918
11919   def DeclareLocks(self, level):
11920     if level == locking.LEVEL_NODEGROUP:
11921       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11922
11923       # Try to get all affected nodes' groups without having the group or node
11924       # lock yet. Needs verification later in the code flow.
11925       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11926
11927       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11928
11929   def CheckPrereq(self):
11930     """Check prerequisites.
11931
11932     """
11933     assert self.needed_locks[locking.LEVEL_NODEGROUP]
11934     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11935             frozenset(self.op.nodes))
11936
11937     expected_locks = (set([self.group_uuid]) |
11938                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11939     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11940     if actual_locks != expected_locks:
11941       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11942                                " current groups are '%s', used to be '%s'" %
11943                                (utils.CommaJoin(expected_locks),
11944                                 utils.CommaJoin(actual_locks)))
11945
11946     self.node_data = self.cfg.GetAllNodesInfo()
11947     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11948     instance_data = self.cfg.GetAllInstancesInfo()
11949
11950     if self.group is None:
11951       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11952                                (self.op.group_name, self.group_uuid))
11953
11954     (new_splits, previous_splits) = \
11955       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11956                                              for node in self.op.nodes],
11957                                             self.node_data, instance_data)
11958
11959     if new_splits:
11960       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11961
11962       if not self.op.force:
11963         raise errors.OpExecError("The following instances get split by this"
11964                                  " change and --force was not given: %s" %
11965                                  fmt_new_splits)
11966       else:
11967         self.LogWarning("This operation will split the following instances: %s",
11968                         fmt_new_splits)
11969
11970         if previous_splits:
11971           self.LogWarning("In addition, these already-split instances continue"
11972                           " to be split across groups: %s",
11973                           utils.CommaJoin(utils.NiceSort(previous_splits)))
11974
11975   def Exec(self, feedback_fn):
11976     """Assign nodes to a new group.
11977
11978     """
11979     for node in self.op.nodes:
11980       self.node_data[node].group = self.group_uuid
11981
11982     # FIXME: Depends on side-effects of modifying the result of
11983     # C{cfg.GetAllNodesInfo}
11984
11985     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11986
11987   @staticmethod
11988   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11989     """Check for split instances after a node assignment.
11990
11991     This method considers a series of node assignments as an atomic operation,
11992     and returns information about split instances after applying the set of
11993     changes.
11994
11995     In particular, it returns information about newly split instances, and
11996     instances that were already split, and remain so after the change.
11997
11998     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11999     considered.
12000
12001     @type changes: list of (node_name, new_group_uuid) pairs.
12002     @param changes: list of node assignments to consider.
12003     @param node_data: a dict with data for all nodes
12004     @param instance_data: a dict with all instances to consider
12005     @rtype: a two-tuple
12006     @return: a list of instances that were previously okay and result split as a
12007       consequence of this change, and a list of instances that were previously
12008       split and this change does not fix.
12009
12010     """
12011     changed_nodes = dict((node, group) for node, group in changes
12012                          if node_data[node].group != group)
12013
12014     all_split_instances = set()
12015     previously_split_instances = set()
12016
12017     def InstanceNodes(instance):
12018       return [instance.primary_node] + list(instance.secondary_nodes)
12019
12020     for inst in instance_data.values():
12021       if inst.disk_template not in constants.DTS_INT_MIRROR:
12022         continue
12023
12024       instance_nodes = InstanceNodes(inst)
12025
12026       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12027         previously_split_instances.add(inst.name)
12028
12029       if len(set(changed_nodes.get(node, node_data[node].group)
12030                  for node in instance_nodes)) > 1:
12031         all_split_instances.add(inst.name)
12032
12033     return (list(all_split_instances - previously_split_instances),
12034             list(previously_split_instances & all_split_instances))
12035
12036
12037 class _GroupQuery(_QueryBase):
12038   FIELDS = query.GROUP_FIELDS
12039
12040   def ExpandNames(self, lu):
12041     lu.needed_locks = {}
12042
12043     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12044     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12045
12046     if not self.names:
12047       self.wanted = [name_to_uuid[name]
12048                      for name in utils.NiceSort(name_to_uuid.keys())]
12049     else:
12050       # Accept names to be either names or UUIDs.
12051       missing = []
12052       self.wanted = []
12053       all_uuid = frozenset(self._all_groups.keys())
12054
12055       for name in self.names:
12056         if name in all_uuid:
12057           self.wanted.append(name)
12058         elif name in name_to_uuid:
12059           self.wanted.append(name_to_uuid[name])
12060         else:
12061           missing.append(name)
12062
12063       if missing:
12064         raise errors.OpPrereqError("Some groups do not exist: %s" %
12065                                    utils.CommaJoin(missing),
12066                                    errors.ECODE_NOENT)
12067
12068   def DeclareLocks(self, lu, level):
12069     pass
12070
12071   def _GetQueryData(self, lu):
12072     """Computes the list of node groups and their attributes.
12073
12074     """
12075     do_nodes = query.GQ_NODE in self.requested_data
12076     do_instances = query.GQ_INST in self.requested_data
12077
12078     group_to_nodes = None
12079     group_to_instances = None
12080
12081     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12082     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12083     # latter GetAllInstancesInfo() is not enough, for we have to go through
12084     # instance->node. Hence, we will need to process nodes even if we only need
12085     # instance information.
12086     if do_nodes or do_instances:
12087       all_nodes = lu.cfg.GetAllNodesInfo()
12088       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12089       node_to_group = {}
12090
12091       for node in all_nodes.values():
12092         if node.group in group_to_nodes:
12093           group_to_nodes[node.group].append(node.name)
12094           node_to_group[node.name] = node.group
12095
12096       if do_instances:
12097         all_instances = lu.cfg.GetAllInstancesInfo()
12098         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12099
12100         for instance in all_instances.values():
12101           node = instance.primary_node
12102           if node in node_to_group:
12103             group_to_instances[node_to_group[node]].append(instance.name)
12104
12105         if not do_nodes:
12106           # Do not pass on node information if it was not requested.
12107           group_to_nodes = None
12108
12109     return query.GroupQueryData([self._all_groups[uuid]
12110                                  for uuid in self.wanted],
12111                                 group_to_nodes, group_to_instances)
12112
12113
12114 class LUGroupQuery(NoHooksLU):
12115   """Logical unit for querying node groups.
12116
12117   """
12118   REQ_BGL = False
12119
12120   def CheckArguments(self):
12121     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12122                           self.op.output_fields, False)
12123
12124   def ExpandNames(self):
12125     self.gq.ExpandNames(self)
12126
12127   def DeclareLocks(self, level):
12128     self.gq.DeclareLocks(self, level)
12129
12130   def Exec(self, feedback_fn):
12131     return self.gq.OldStyleQuery(self)
12132
12133
12134 class LUGroupSetParams(LogicalUnit):
12135   """Modifies the parameters of a node group.
12136
12137   """
12138   HPATH = "group-modify"
12139   HTYPE = constants.HTYPE_GROUP
12140   REQ_BGL = False
12141
12142   def CheckArguments(self):
12143     all_changes = [
12144       self.op.ndparams,
12145       self.op.alloc_policy,
12146       ]
12147
12148     if all_changes.count(None) == len(all_changes):
12149       raise errors.OpPrereqError("Please pass at least one modification",
12150                                  errors.ECODE_INVAL)
12151
12152   def ExpandNames(self):
12153     # This raises errors.OpPrereqError on its own:
12154     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12155
12156     self.needed_locks = {
12157       locking.LEVEL_NODEGROUP: [self.group_uuid],
12158       }
12159
12160   def CheckPrereq(self):
12161     """Check prerequisites.
12162
12163     """
12164     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12165
12166     if self.group is None:
12167       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12168                                (self.op.group_name, self.group_uuid))
12169
12170     if self.op.ndparams:
12171       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12172       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12173       self.new_ndparams = new_ndparams
12174
12175   def BuildHooksEnv(self):
12176     """Build hooks env.
12177
12178     """
12179     return {
12180       "GROUP_NAME": self.op.group_name,
12181       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12182       }
12183
12184   def BuildHooksNodes(self):
12185     """Build hooks nodes.
12186
12187     """
12188     mn = self.cfg.GetMasterNode()
12189     return ([mn], [mn])
12190
12191   def Exec(self, feedback_fn):
12192     """Modifies the node group.
12193
12194     """
12195     result = []
12196
12197     if self.op.ndparams:
12198       self.group.ndparams = self.new_ndparams
12199       result.append(("ndparams", str(self.group.ndparams)))
12200
12201     if self.op.alloc_policy:
12202       self.group.alloc_policy = self.op.alloc_policy
12203
12204     self.cfg.Update(self.group, feedback_fn)
12205     return result
12206
12207
12208 class LUGroupRemove(LogicalUnit):
12209   HPATH = "group-remove"
12210   HTYPE = constants.HTYPE_GROUP
12211   REQ_BGL = False
12212
12213   def ExpandNames(self):
12214     # This will raises errors.OpPrereqError on its own:
12215     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12216     self.needed_locks = {
12217       locking.LEVEL_NODEGROUP: [self.group_uuid],
12218       }
12219
12220   def CheckPrereq(self):
12221     """Check prerequisites.
12222
12223     This checks that the given group name exists as a node group, that is
12224     empty (i.e., contains no nodes), and that is not the last group of the
12225     cluster.
12226
12227     """
12228     # Verify that the group is empty.
12229     group_nodes = [node.name
12230                    for node in self.cfg.GetAllNodesInfo().values()
12231                    if node.group == self.group_uuid]
12232
12233     if group_nodes:
12234       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12235                                  " nodes: %s" %
12236                                  (self.op.group_name,
12237                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12238                                  errors.ECODE_STATE)
12239
12240     # Verify the cluster would not be left group-less.
12241     if len(self.cfg.GetNodeGroupList()) == 1:
12242       raise errors.OpPrereqError("Group '%s' is the only group,"
12243                                  " cannot be removed" %
12244                                  self.op.group_name,
12245                                  errors.ECODE_STATE)
12246
12247   def BuildHooksEnv(self):
12248     """Build hooks env.
12249
12250     """
12251     return {
12252       "GROUP_NAME": self.op.group_name,
12253       }
12254
12255   def BuildHooksNodes(self):
12256     """Build hooks nodes.
12257
12258     """
12259     mn = self.cfg.GetMasterNode()
12260     return ([mn], [mn])
12261
12262   def Exec(self, feedback_fn):
12263     """Remove the node group.
12264
12265     """
12266     try:
12267       self.cfg.RemoveNodeGroup(self.group_uuid)
12268     except errors.ConfigurationError:
12269       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12270                                (self.op.group_name, self.group_uuid))
12271
12272     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12273
12274
12275 class LUGroupRename(LogicalUnit):
12276   HPATH = "group-rename"
12277   HTYPE = constants.HTYPE_GROUP
12278   REQ_BGL = False
12279
12280   def ExpandNames(self):
12281     # This raises errors.OpPrereqError on its own:
12282     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12283
12284     self.needed_locks = {
12285       locking.LEVEL_NODEGROUP: [self.group_uuid],
12286       }
12287
12288   def CheckPrereq(self):
12289     """Check prerequisites.
12290
12291     Ensures requested new name is not yet used.
12292
12293     """
12294     try:
12295       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12296     except errors.OpPrereqError:
12297       pass
12298     else:
12299       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12300                                  " node group (UUID: %s)" %
12301                                  (self.op.new_name, new_name_uuid),
12302                                  errors.ECODE_EXISTS)
12303
12304   def BuildHooksEnv(self):
12305     """Build hooks env.
12306
12307     """
12308     return {
12309       "OLD_NAME": self.op.group_name,
12310       "NEW_NAME": self.op.new_name,
12311       }
12312
12313   def BuildHooksNodes(self):
12314     """Build hooks nodes.
12315
12316     """
12317     mn = self.cfg.GetMasterNode()
12318
12319     all_nodes = self.cfg.GetAllNodesInfo()
12320     all_nodes.pop(mn, None)
12321
12322     run_nodes = [mn]
12323     run_nodes.extend(node.name for node in all_nodes.values()
12324                      if node.group == self.group_uuid)
12325
12326     return (run_nodes, run_nodes)
12327
12328   def Exec(self, feedback_fn):
12329     """Rename the node group.
12330
12331     """
12332     group = self.cfg.GetNodeGroup(self.group_uuid)
12333
12334     if group is None:
12335       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12336                                (self.op.group_name, self.group_uuid))
12337
12338     group.name = self.op.new_name
12339     self.cfg.Update(group, feedback_fn)
12340
12341     return self.op.new_name
12342
12343
12344 class LUGroupEvacuate(LogicalUnit):
12345   HPATH = "group-evacuate"
12346   HTYPE = constants.HTYPE_GROUP
12347   REQ_BGL = False
12348
12349   def ExpandNames(self):
12350     # This raises errors.OpPrereqError on its own:
12351     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12352
12353     if self.op.target_groups:
12354       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12355                                   self.op.target_groups)
12356     else:
12357       self.req_target_uuids = []
12358
12359     if self.group_uuid in self.req_target_uuids:
12360       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12361                                  " as a target group (targets are %s)" %
12362                                  (self.group_uuid,
12363                                   utils.CommaJoin(self.req_target_uuids)),
12364                                  errors.ECODE_INVAL)
12365
12366     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12367
12368     self.share_locks = _ShareAll()
12369     self.needed_locks = {
12370       locking.LEVEL_INSTANCE: [],
12371       locking.LEVEL_NODEGROUP: [],
12372       locking.LEVEL_NODE: [],
12373       }
12374
12375   def DeclareLocks(self, level):
12376     if level == locking.LEVEL_INSTANCE:
12377       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12378
12379       # Lock instances optimistically, needs verification once node and group
12380       # locks have been acquired
12381       self.needed_locks[locking.LEVEL_INSTANCE] = \
12382         self.cfg.GetNodeGroupInstances(self.group_uuid)
12383
12384     elif level == locking.LEVEL_NODEGROUP:
12385       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12386
12387       if self.req_target_uuids:
12388         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12389
12390         # Lock all groups used by instances optimistically; this requires going
12391         # via the node before it's locked, requiring verification later on
12392         lock_groups.update(group_uuid
12393                            for instance_name in
12394                              self.owned_locks(locking.LEVEL_INSTANCE)
12395                            for group_uuid in
12396                              self.cfg.GetInstanceNodeGroups(instance_name))
12397       else:
12398         # No target groups, need to lock all of them
12399         lock_groups = locking.ALL_SET
12400
12401       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12402
12403     elif level == locking.LEVEL_NODE:
12404       # This will only lock the nodes in the group to be evacuated which
12405       # contain actual instances
12406       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12407       self._LockInstancesNodes()
12408
12409       # Lock all nodes in group to be evacuated and target groups
12410       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12411       assert self.group_uuid in owned_groups
12412       member_nodes = [node_name
12413                       for group in owned_groups
12414                       for node_name in self.cfg.GetNodeGroup(group).members]
12415       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12416
12417   def CheckPrereq(self):
12418     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12419     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12420     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12421
12422     assert owned_groups.issuperset(self.req_target_uuids)
12423     assert self.group_uuid in owned_groups
12424
12425     # Check if locked instances are still correct
12426     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12427
12428     # Get instance information
12429     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12430
12431     # Check if node groups for locked instances are still correct
12432     for instance_name in owned_instances:
12433       inst = self.instances[instance_name]
12434       assert owned_nodes.issuperset(inst.all_nodes), \
12435         "Instance %s's nodes changed while we kept the lock" % instance_name
12436
12437       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12438                                              owned_groups)
12439
12440       assert self.group_uuid in inst_groups, \
12441         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12442
12443     if self.req_target_uuids:
12444       # User requested specific target groups
12445       self.target_uuids = self.req_target_uuids
12446     else:
12447       # All groups except the one to be evacuated are potential targets
12448       self.target_uuids = [group_uuid for group_uuid in owned_groups
12449                            if group_uuid != self.group_uuid]
12450
12451       if not self.target_uuids:
12452         raise errors.OpPrereqError("There are no possible target groups",
12453                                    errors.ECODE_INVAL)
12454
12455   def BuildHooksEnv(self):
12456     """Build hooks env.
12457
12458     """
12459     return {
12460       "GROUP_NAME": self.op.group_name,
12461       "TARGET_GROUPS": " ".join(self.target_uuids),
12462       }
12463
12464   def BuildHooksNodes(self):
12465     """Build hooks nodes.
12466
12467     """
12468     mn = self.cfg.GetMasterNode()
12469
12470     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12471
12472     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12473
12474     return (run_nodes, run_nodes)
12475
12476   def Exec(self, feedback_fn):
12477     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12478
12479     assert self.group_uuid not in self.target_uuids
12480
12481     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12482                      instances=instances, target_groups=self.target_uuids)
12483
12484     ial.Run(self.op.iallocator)
12485
12486     if not ial.success:
12487       raise errors.OpPrereqError("Can't compute group evacuation using"
12488                                  " iallocator '%s': %s" %
12489                                  (self.op.iallocator, ial.info),
12490                                  errors.ECODE_NORES)
12491
12492     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12493
12494     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12495                  len(jobs), self.op.group_name)
12496
12497     return ResultWithJobs(jobs)
12498
12499
12500 class TagsLU(NoHooksLU): # pylint: disable=W0223
12501   """Generic tags LU.
12502
12503   This is an abstract class which is the parent of all the other tags LUs.
12504
12505   """
12506   def ExpandNames(self):
12507     self.group_uuid = None
12508     self.needed_locks = {}
12509     if self.op.kind == constants.TAG_NODE:
12510       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12511       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12512     elif self.op.kind == constants.TAG_INSTANCE:
12513       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12514       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12515     elif self.op.kind == constants.TAG_NODEGROUP:
12516       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12517
12518     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12519     # not possible to acquire the BGL based on opcode parameters)
12520
12521   def CheckPrereq(self):
12522     """Check prerequisites.
12523
12524     """
12525     if self.op.kind == constants.TAG_CLUSTER:
12526       self.target = self.cfg.GetClusterInfo()
12527     elif self.op.kind == constants.TAG_NODE:
12528       self.target = self.cfg.GetNodeInfo(self.op.name)
12529     elif self.op.kind == constants.TAG_INSTANCE:
12530       self.target = self.cfg.GetInstanceInfo(self.op.name)
12531     elif self.op.kind == constants.TAG_NODEGROUP:
12532       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12533     else:
12534       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12535                                  str(self.op.kind), errors.ECODE_INVAL)
12536
12537
12538 class LUTagsGet(TagsLU):
12539   """Returns the tags of a given object.
12540
12541   """
12542   REQ_BGL = False
12543
12544   def ExpandNames(self):
12545     TagsLU.ExpandNames(self)
12546
12547     # Share locks as this is only a read operation
12548     self.share_locks = _ShareAll()
12549
12550   def Exec(self, feedback_fn):
12551     """Returns the tag list.
12552
12553     """
12554     return list(self.target.GetTags())
12555
12556
12557 class LUTagsSearch(NoHooksLU):
12558   """Searches the tags for a given pattern.
12559
12560   """
12561   REQ_BGL = False
12562
12563   def ExpandNames(self):
12564     self.needed_locks = {}
12565
12566   def CheckPrereq(self):
12567     """Check prerequisites.
12568
12569     This checks the pattern passed for validity by compiling it.
12570
12571     """
12572     try:
12573       self.re = re.compile(self.op.pattern)
12574     except re.error, err:
12575       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12576                                  (self.op.pattern, err), errors.ECODE_INVAL)
12577
12578   def Exec(self, feedback_fn):
12579     """Returns the tag list.
12580
12581     """
12582     cfg = self.cfg
12583     tgts = [("/cluster", cfg.GetClusterInfo())]
12584     ilist = cfg.GetAllInstancesInfo().values()
12585     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12586     nlist = cfg.GetAllNodesInfo().values()
12587     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12588     tgts.extend(("/nodegroup/%s" % n.name, n)
12589                 for n in cfg.GetAllNodeGroupsInfo().values())
12590     results = []
12591     for path, target in tgts:
12592       for tag in target.GetTags():
12593         if self.re.search(tag):
12594           results.append((path, tag))
12595     return results
12596
12597
12598 class LUTagsSet(TagsLU):
12599   """Sets a tag on a given object.
12600
12601   """
12602   REQ_BGL = False
12603
12604   def CheckPrereq(self):
12605     """Check prerequisites.
12606
12607     This checks the type and length of the tag name and value.
12608
12609     """
12610     TagsLU.CheckPrereq(self)
12611     for tag in self.op.tags:
12612       objects.TaggableObject.ValidateTag(tag)
12613
12614   def Exec(self, feedback_fn):
12615     """Sets the tag.
12616
12617     """
12618     try:
12619       for tag in self.op.tags:
12620         self.target.AddTag(tag)
12621     except errors.TagError, err:
12622       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12623     self.cfg.Update(self.target, feedback_fn)
12624
12625
12626 class LUTagsDel(TagsLU):
12627   """Delete a list of tags from a given object.
12628
12629   """
12630   REQ_BGL = False
12631
12632   def CheckPrereq(self):
12633     """Check prerequisites.
12634
12635     This checks that we have the given tag.
12636
12637     """
12638     TagsLU.CheckPrereq(self)
12639     for tag in self.op.tags:
12640       objects.TaggableObject.ValidateTag(tag)
12641     del_tags = frozenset(self.op.tags)
12642     cur_tags = self.target.GetTags()
12643
12644     diff_tags = del_tags - cur_tags
12645     if diff_tags:
12646       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12647       raise errors.OpPrereqError("Tag(s) %s not found" %
12648                                  (utils.CommaJoin(diff_names), ),
12649                                  errors.ECODE_NOENT)
12650
12651   def Exec(self, feedback_fn):
12652     """Remove the tag from the object.
12653
12654     """
12655     for tag in self.op.tags:
12656       self.target.RemoveTag(tag)
12657     self.cfg.Update(self.target, feedback_fn)
12658
12659
12660 class LUTestDelay(NoHooksLU):
12661   """Sleep for a specified amount of time.
12662
12663   This LU sleeps on the master and/or nodes for a specified amount of
12664   time.
12665
12666   """
12667   REQ_BGL = False
12668
12669   def ExpandNames(self):
12670     """Expand names and set required locks.
12671
12672     This expands the node list, if any.
12673
12674     """
12675     self.needed_locks = {}
12676     if self.op.on_nodes:
12677       # _GetWantedNodes can be used here, but is not always appropriate to use
12678       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12679       # more information.
12680       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12681       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12682
12683   def _TestDelay(self):
12684     """Do the actual sleep.
12685
12686     """
12687     if self.op.on_master:
12688       if not utils.TestDelay(self.op.duration):
12689         raise errors.OpExecError("Error during master delay test")
12690     if self.op.on_nodes:
12691       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12692       for node, node_result in result.items():
12693         node_result.Raise("Failure during rpc call to node %s" % node)
12694
12695   def Exec(self, feedback_fn):
12696     """Execute the test delay opcode, with the wanted repetitions.
12697
12698     """
12699     if self.op.repeat == 0:
12700       self._TestDelay()
12701     else:
12702       top_value = self.op.repeat - 1
12703       for i in range(self.op.repeat):
12704         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12705         self._TestDelay()
12706
12707
12708 class LUTestJqueue(NoHooksLU):
12709   """Utility LU to test some aspects of the job queue.
12710
12711   """
12712   REQ_BGL = False
12713
12714   # Must be lower than default timeout for WaitForJobChange to see whether it
12715   # notices changed jobs
12716   _CLIENT_CONNECT_TIMEOUT = 20.0
12717   _CLIENT_CONFIRM_TIMEOUT = 60.0
12718
12719   @classmethod
12720   def _NotifyUsingSocket(cls, cb, errcls):
12721     """Opens a Unix socket and waits for another program to connect.
12722
12723     @type cb: callable
12724     @param cb: Callback to send socket name to client
12725     @type errcls: class
12726     @param errcls: Exception class to use for errors
12727
12728     """
12729     # Using a temporary directory as there's no easy way to create temporary
12730     # sockets without writing a custom loop around tempfile.mktemp and
12731     # socket.bind
12732     tmpdir = tempfile.mkdtemp()
12733     try:
12734       tmpsock = utils.PathJoin(tmpdir, "sock")
12735
12736       logging.debug("Creating temporary socket at %s", tmpsock)
12737       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12738       try:
12739         sock.bind(tmpsock)
12740         sock.listen(1)
12741
12742         # Send details to client
12743         cb(tmpsock)
12744
12745         # Wait for client to connect before continuing
12746         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12747         try:
12748           (conn, _) = sock.accept()
12749         except socket.error, err:
12750           raise errcls("Client didn't connect in time (%s)" % err)
12751       finally:
12752         sock.close()
12753     finally:
12754       # Remove as soon as client is connected
12755       shutil.rmtree(tmpdir)
12756
12757     # Wait for client to close
12758     try:
12759       try:
12760         # pylint: disable=E1101
12761         # Instance of '_socketobject' has no ... member
12762         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12763         conn.recv(1)
12764       except socket.error, err:
12765         raise errcls("Client failed to confirm notification (%s)" % err)
12766     finally:
12767       conn.close()
12768
12769   def _SendNotification(self, test, arg, sockname):
12770     """Sends a notification to the client.
12771
12772     @type test: string
12773     @param test: Test name
12774     @param arg: Test argument (depends on test)
12775     @type sockname: string
12776     @param sockname: Socket path
12777
12778     """
12779     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12780
12781   def _Notify(self, prereq, test, arg):
12782     """Notifies the client of a test.
12783
12784     @type prereq: bool
12785     @param prereq: Whether this is a prereq-phase test
12786     @type test: string
12787     @param test: Test name
12788     @param arg: Test argument (depends on test)
12789
12790     """
12791     if prereq:
12792       errcls = errors.OpPrereqError
12793     else:
12794       errcls = errors.OpExecError
12795
12796     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12797                                                   test, arg),
12798                                    errcls)
12799
12800   def CheckArguments(self):
12801     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12802     self.expandnames_calls = 0
12803
12804   def ExpandNames(self):
12805     checkargs_calls = getattr(self, "checkargs_calls", 0)
12806     if checkargs_calls < 1:
12807       raise errors.ProgrammerError("CheckArguments was not called")
12808
12809     self.expandnames_calls += 1
12810
12811     if self.op.notify_waitlock:
12812       self._Notify(True, constants.JQT_EXPANDNAMES, None)
12813
12814     self.LogInfo("Expanding names")
12815
12816     # Get lock on master node (just to get a lock, not for a particular reason)
12817     self.needed_locks = {
12818       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12819       }
12820
12821   def Exec(self, feedback_fn):
12822     if self.expandnames_calls < 1:
12823       raise errors.ProgrammerError("ExpandNames was not called")
12824
12825     if self.op.notify_exec:
12826       self._Notify(False, constants.JQT_EXEC, None)
12827
12828     self.LogInfo("Executing")
12829
12830     if self.op.log_messages:
12831       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12832       for idx, msg in enumerate(self.op.log_messages):
12833         self.LogInfo("Sending log message %s", idx + 1)
12834         feedback_fn(constants.JQT_MSGPREFIX + msg)
12835         # Report how many test messages have been sent
12836         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12837
12838     if self.op.fail:
12839       raise errors.OpExecError("Opcode failure was requested")
12840
12841     return True
12842
12843
12844 class IAllocator(object):
12845   """IAllocator framework.
12846
12847   An IAllocator instance has three sets of attributes:
12848     - cfg that is needed to query the cluster
12849     - input data (all members of the _KEYS class attribute are required)
12850     - four buffer attributes (in|out_data|text), that represent the
12851       input (to the external script) in text and data structure format,
12852       and the output from it, again in two formats
12853     - the result variables from the script (success, info, nodes) for
12854       easy usage
12855
12856   """
12857   # pylint: disable=R0902
12858   # lots of instance attributes
12859
12860   def __init__(self, cfg, rpc, mode, **kwargs):
12861     self.cfg = cfg
12862     self.rpc = rpc
12863     # init buffer variables
12864     self.in_text = self.out_text = self.in_data = self.out_data = None
12865     # init all input fields so that pylint is happy
12866     self.mode = mode
12867     self.memory = self.disks = self.disk_template = None
12868     self.os = self.tags = self.nics = self.vcpus = None
12869     self.hypervisor = None
12870     self.relocate_from = None
12871     self.name = None
12872     self.instances = None
12873     self.evac_mode = None
12874     self.target_groups = []
12875     # computed fields
12876     self.required_nodes = None
12877     # init result fields
12878     self.success = self.info = self.result = None
12879
12880     try:
12881       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12882     except KeyError:
12883       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12884                                    " IAllocator" % self.mode)
12885
12886     keyset = [n for (n, _) in keydata]
12887
12888     for key in kwargs:
12889       if key not in keyset:
12890         raise errors.ProgrammerError("Invalid input parameter '%s' to"
12891                                      " IAllocator" % key)
12892       setattr(self, key, kwargs[key])
12893
12894     for key in keyset:
12895       if key not in kwargs:
12896         raise errors.ProgrammerError("Missing input parameter '%s' to"
12897                                      " IAllocator" % key)
12898     self._BuildInputData(compat.partial(fn, self), keydata)
12899
12900   def _ComputeClusterData(self):
12901     """Compute the generic allocator input data.
12902
12903     This is the data that is independent of the actual operation.
12904
12905     """
12906     cfg = self.cfg
12907     cluster_info = cfg.GetClusterInfo()
12908     # cluster data
12909     data = {
12910       "version": constants.IALLOCATOR_VERSION,
12911       "cluster_name": cfg.GetClusterName(),
12912       "cluster_tags": list(cluster_info.GetTags()),
12913       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12914       # we don't have job IDs
12915       }
12916     ninfo = cfg.GetAllNodesInfo()
12917     iinfo = cfg.GetAllInstancesInfo().values()
12918     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12919
12920     # node data
12921     node_list = [n.name for n in ninfo.values() if n.vm_capable]
12922
12923     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12924       hypervisor_name = self.hypervisor
12925     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12926       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12927     else:
12928       hypervisor_name = cluster_info.enabled_hypervisors[0]
12929
12930     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12931                                         hypervisor_name)
12932     node_iinfo = \
12933       self.rpc.call_all_instances_info(node_list,
12934                                        cluster_info.enabled_hypervisors)
12935
12936     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12937
12938     config_ndata = self._ComputeBasicNodeData(ninfo)
12939     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12940                                                  i_list, config_ndata)
12941     assert len(data["nodes"]) == len(ninfo), \
12942         "Incomplete node data computed"
12943
12944     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12945
12946     self.in_data = data
12947
12948   @staticmethod
12949   def _ComputeNodeGroupData(cfg):
12950     """Compute node groups data.
12951
12952     """
12953     ng = dict((guuid, {
12954       "name": gdata.name,
12955       "alloc_policy": gdata.alloc_policy,
12956       })
12957       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12958
12959     return ng
12960
12961   @staticmethod
12962   def _ComputeBasicNodeData(node_cfg):
12963     """Compute global node data.
12964
12965     @rtype: dict
12966     @returns: a dict of name: (node dict, node config)
12967
12968     """
12969     # fill in static (config-based) values
12970     node_results = dict((ninfo.name, {
12971       "tags": list(ninfo.GetTags()),
12972       "primary_ip": ninfo.primary_ip,
12973       "secondary_ip": ninfo.secondary_ip,
12974       "offline": ninfo.offline,
12975       "drained": ninfo.drained,
12976       "master_candidate": ninfo.master_candidate,
12977       "group": ninfo.group,
12978       "master_capable": ninfo.master_capable,
12979       "vm_capable": ninfo.vm_capable,
12980       })
12981       for ninfo in node_cfg.values())
12982
12983     return node_results
12984
12985   @staticmethod
12986   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12987                               node_results):
12988     """Compute global node data.
12989
12990     @param node_results: the basic node structures as filled from the config
12991
12992     """
12993     # make a copy of the current dict
12994     node_results = dict(node_results)
12995     for nname, nresult in node_data.items():
12996       assert nname in node_results, "Missing basic data for node %s" % nname
12997       ninfo = node_cfg[nname]
12998
12999       if not (ninfo.offline or ninfo.drained):
13000         nresult.Raise("Can't get data for node %s" % nname)
13001         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13002                                 nname)
13003         remote_info = nresult.payload
13004
13005         for attr in ["memory_total", "memory_free", "memory_dom0",
13006                      "vg_size", "vg_free", "cpu_total"]:
13007           if attr not in remote_info:
13008             raise errors.OpExecError("Node '%s' didn't return attribute"
13009                                      " '%s'" % (nname, attr))
13010           if not isinstance(remote_info[attr], int):
13011             raise errors.OpExecError("Node '%s' returned invalid value"
13012                                      " for '%s': %s" %
13013                                      (nname, attr, remote_info[attr]))
13014         # compute memory used by primary instances
13015         i_p_mem = i_p_up_mem = 0
13016         for iinfo, beinfo in i_list:
13017           if iinfo.primary_node == nname:
13018             i_p_mem += beinfo[constants.BE_MEMORY]
13019             if iinfo.name not in node_iinfo[nname].payload:
13020               i_used_mem = 0
13021             else:
13022               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13023             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13024             remote_info["memory_free"] -= max(0, i_mem_diff)
13025
13026             if iinfo.admin_up:
13027               i_p_up_mem += beinfo[constants.BE_MEMORY]
13028
13029         # compute memory used by instances
13030         pnr_dyn = {
13031           "total_memory": remote_info["memory_total"],
13032           "reserved_memory": remote_info["memory_dom0"],
13033           "free_memory": remote_info["memory_free"],
13034           "total_disk": remote_info["vg_size"],
13035           "free_disk": remote_info["vg_free"],
13036           "total_cpus": remote_info["cpu_total"],
13037           "i_pri_memory": i_p_mem,
13038           "i_pri_up_memory": i_p_up_mem,
13039           }
13040         pnr_dyn.update(node_results[nname])
13041         node_results[nname] = pnr_dyn
13042
13043     return node_results
13044
13045   @staticmethod
13046   def _ComputeInstanceData(cluster_info, i_list):
13047     """Compute global instance data.
13048
13049     """
13050     instance_data = {}
13051     for iinfo, beinfo in i_list:
13052       nic_data = []
13053       for nic in iinfo.nics:
13054         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13055         nic_dict = {
13056           "mac": nic.mac,
13057           "ip": nic.ip,
13058           "mode": filled_params[constants.NIC_MODE],
13059           "link": filled_params[constants.NIC_LINK],
13060           }
13061         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13062           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13063         nic_data.append(nic_dict)
13064       pir = {
13065         "tags": list(iinfo.GetTags()),
13066         "admin_up": iinfo.admin_up,
13067         "vcpus": beinfo[constants.BE_VCPUS],
13068         "memory": beinfo[constants.BE_MEMORY],
13069         "os": iinfo.os,
13070         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13071         "nics": nic_data,
13072         "disks": [{constants.IDISK_SIZE: dsk.size,
13073                    constants.IDISK_MODE: dsk.mode}
13074                   for dsk in iinfo.disks],
13075         "disk_template": iinfo.disk_template,
13076         "hypervisor": iinfo.hypervisor,
13077         }
13078       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13079                                                  pir["disks"])
13080       instance_data[iinfo.name] = pir
13081
13082     return instance_data
13083
13084   def _AddNewInstance(self):
13085     """Add new instance data to allocator structure.
13086
13087     This in combination with _AllocatorGetClusterData will create the
13088     correct structure needed as input for the allocator.
13089
13090     The checks for the completeness of the opcode must have already been
13091     done.
13092
13093     """
13094     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13095
13096     if self.disk_template in constants.DTS_INT_MIRROR:
13097       self.required_nodes = 2
13098     else:
13099       self.required_nodes = 1
13100
13101     request = {
13102       "name": self.name,
13103       "disk_template": self.disk_template,
13104       "tags": self.tags,
13105       "os": self.os,
13106       "vcpus": self.vcpus,
13107       "memory": self.memory,
13108       "disks": self.disks,
13109       "disk_space_total": disk_space,
13110       "nics": self.nics,
13111       "required_nodes": self.required_nodes,
13112       "hypervisor": self.hypervisor,
13113       }
13114
13115     return request
13116
13117   def _AddRelocateInstance(self):
13118     """Add relocate instance data to allocator structure.
13119
13120     This in combination with _IAllocatorGetClusterData will create the
13121     correct structure needed as input for the allocator.
13122
13123     The checks for the completeness of the opcode must have already been
13124     done.
13125
13126     """
13127     instance = self.cfg.GetInstanceInfo(self.name)
13128     if instance is None:
13129       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13130                                    " IAllocator" % self.name)
13131
13132     if instance.disk_template not in constants.DTS_MIRRORED:
13133       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13134                                  errors.ECODE_INVAL)
13135
13136     if instance.disk_template in constants.DTS_INT_MIRROR and \
13137         len(instance.secondary_nodes) != 1:
13138       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13139                                  errors.ECODE_STATE)
13140
13141     self.required_nodes = 1
13142     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13143     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13144
13145     request = {
13146       "name": self.name,
13147       "disk_space_total": disk_space,
13148       "required_nodes": self.required_nodes,
13149       "relocate_from": self.relocate_from,
13150       }
13151     return request
13152
13153   def _AddNodeEvacuate(self):
13154     """Get data for node-evacuate requests.
13155
13156     """
13157     return {
13158       "instances": self.instances,
13159       "evac_mode": self.evac_mode,
13160       }
13161
13162   def _AddChangeGroup(self):
13163     """Get data for node-evacuate requests.
13164
13165     """
13166     return {
13167       "instances": self.instances,
13168       "target_groups": self.target_groups,
13169       }
13170
13171   def _BuildInputData(self, fn, keydata):
13172     """Build input data structures.
13173
13174     """
13175     self._ComputeClusterData()
13176
13177     request = fn()
13178     request["type"] = self.mode
13179     for keyname, keytype in keydata:
13180       if keyname not in request:
13181         raise errors.ProgrammerError("Request parameter %s is missing" %
13182                                      keyname)
13183       val = request[keyname]
13184       if not keytype(val):
13185         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13186                                      " validation, value %s, expected"
13187                                      " type %s" % (keyname, val, keytype))
13188     self.in_data["request"] = request
13189
13190     self.in_text = serializer.Dump(self.in_data)
13191
13192   _STRING_LIST = ht.TListOf(ht.TString)
13193   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13194      # pylint: disable=E1101
13195      # Class '...' has no 'OP_ID' member
13196      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13197                           opcodes.OpInstanceMigrate.OP_ID,
13198                           opcodes.OpInstanceReplaceDisks.OP_ID])
13199      })))
13200
13201   _NEVAC_MOVED = \
13202     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13203                        ht.TItems([ht.TNonEmptyString,
13204                                   ht.TNonEmptyString,
13205                                   ht.TListOf(ht.TNonEmptyString),
13206                                  ])))
13207   _NEVAC_FAILED = \
13208     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13209                        ht.TItems([ht.TNonEmptyString,
13210                                   ht.TMaybeString,
13211                                  ])))
13212   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13213                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13214
13215   _MODE_DATA = {
13216     constants.IALLOCATOR_MODE_ALLOC:
13217       (_AddNewInstance,
13218        [
13219         ("name", ht.TString),
13220         ("memory", ht.TInt),
13221         ("disks", ht.TListOf(ht.TDict)),
13222         ("disk_template", ht.TString),
13223         ("os", ht.TString),
13224         ("tags", _STRING_LIST),
13225         ("nics", ht.TListOf(ht.TDict)),
13226         ("vcpus", ht.TInt),
13227         ("hypervisor", ht.TString),
13228         ], ht.TList),
13229     constants.IALLOCATOR_MODE_RELOC:
13230       (_AddRelocateInstance,
13231        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13232        ht.TList),
13233      constants.IALLOCATOR_MODE_NODE_EVAC:
13234       (_AddNodeEvacuate, [
13235         ("instances", _STRING_LIST),
13236         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13237         ], _NEVAC_RESULT),
13238      constants.IALLOCATOR_MODE_CHG_GROUP:
13239       (_AddChangeGroup, [
13240         ("instances", _STRING_LIST),
13241         ("target_groups", _STRING_LIST),
13242         ], _NEVAC_RESULT),
13243     }
13244
13245   def Run(self, name, validate=True, call_fn=None):
13246     """Run an instance allocator and return the results.
13247
13248     """
13249     if call_fn is None:
13250       call_fn = self.rpc.call_iallocator_runner
13251
13252     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13253     result.Raise("Failure while running the iallocator script")
13254
13255     self.out_text = result.payload
13256     if validate:
13257       self._ValidateResult()
13258
13259   def _ValidateResult(self):
13260     """Process the allocator results.
13261
13262     This will process and if successful save the result in
13263     self.out_data and the other parameters.
13264
13265     """
13266     try:
13267       rdict = serializer.Load(self.out_text)
13268     except Exception, err:
13269       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13270
13271     if not isinstance(rdict, dict):
13272       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13273
13274     # TODO: remove backwards compatiblity in later versions
13275     if "nodes" in rdict and "result" not in rdict:
13276       rdict["result"] = rdict["nodes"]
13277       del rdict["nodes"]
13278
13279     for key in "success", "info", "result":
13280       if key not in rdict:
13281         raise errors.OpExecError("Can't parse iallocator results:"
13282                                  " missing key '%s'" % key)
13283       setattr(self, key, rdict[key])
13284
13285     if not self._result_check(self.result):
13286       raise errors.OpExecError("Iallocator returned invalid result,"
13287                                " expected %s, got %s" %
13288                                (self._result_check, self.result),
13289                                errors.ECODE_INVAL)
13290
13291     if self.mode == constants.IALLOCATOR_MODE_RELOC:
13292       assert self.relocate_from is not None
13293       assert self.required_nodes == 1
13294
13295       node2group = dict((name, ndata["group"])
13296                         for (name, ndata) in self.in_data["nodes"].items())
13297
13298       fn = compat.partial(self._NodesToGroups, node2group,
13299                           self.in_data["nodegroups"])
13300
13301       instance = self.cfg.GetInstanceInfo(self.name)
13302       request_groups = fn(self.relocate_from + [instance.primary_node])
13303       result_groups = fn(rdict["result"] + [instance.primary_node])
13304
13305       if self.success and not set(result_groups).issubset(request_groups):
13306         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13307                                  " differ from original groups (%s)" %
13308                                  (utils.CommaJoin(result_groups),
13309                                   utils.CommaJoin(request_groups)))
13310
13311     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13312       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13313
13314     self.out_data = rdict
13315
13316   @staticmethod
13317   def _NodesToGroups(node2group, groups, nodes):
13318     """Returns a list of unique group names for a list of nodes.
13319
13320     @type node2group: dict
13321     @param node2group: Map from node name to group UUID
13322     @type groups: dict
13323     @param groups: Group information
13324     @type nodes: list
13325     @param nodes: Node names
13326
13327     """
13328     result = set()
13329
13330     for node in nodes:
13331       try:
13332         group_uuid = node2group[node]
13333       except KeyError:
13334         # Ignore unknown node
13335         pass
13336       else:
13337         try:
13338           group = groups[group_uuid]
13339         except KeyError:
13340           # Can't find group, let's use UUID
13341           group_name = group_uuid
13342         else:
13343           group_name = group["name"]
13344
13345         result.add(group_name)
13346
13347     return sorted(result)
13348
13349
13350 class LUTestAllocator(NoHooksLU):
13351   """Run allocator tests.
13352
13353   This LU runs the allocator tests
13354
13355   """
13356   def CheckPrereq(self):
13357     """Check prerequisites.
13358
13359     This checks the opcode parameters depending on the director and mode test.
13360
13361     """
13362     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13363       for attr in ["memory", "disks", "disk_template",
13364                    "os", "tags", "nics", "vcpus"]:
13365         if not hasattr(self.op, attr):
13366           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13367                                      attr, errors.ECODE_INVAL)
13368       iname = self.cfg.ExpandInstanceName(self.op.name)
13369       if iname is not None:
13370         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13371                                    iname, errors.ECODE_EXISTS)
13372       if not isinstance(self.op.nics, list):
13373         raise errors.OpPrereqError("Invalid parameter 'nics'",
13374                                    errors.ECODE_INVAL)
13375       if not isinstance(self.op.disks, list):
13376         raise errors.OpPrereqError("Invalid parameter 'disks'",
13377                                    errors.ECODE_INVAL)
13378       for row in self.op.disks:
13379         if (not isinstance(row, dict) or
13380             constants.IDISK_SIZE not in row or
13381             not isinstance(row[constants.IDISK_SIZE], int) or
13382             constants.IDISK_MODE not in row or
13383             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13384           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13385                                      " parameter", errors.ECODE_INVAL)
13386       if self.op.hypervisor is None:
13387         self.op.hypervisor = self.cfg.GetHypervisorType()
13388     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13389       fname = _ExpandInstanceName(self.cfg, self.op.name)
13390       self.op.name = fname
13391       self.relocate_from = \
13392           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13393     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13394                           constants.IALLOCATOR_MODE_NODE_EVAC):
13395       if not self.op.instances:
13396         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13397       self.op.instances = _GetWantedInstances(self, self.op.instances)
13398     else:
13399       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13400                                  self.op.mode, errors.ECODE_INVAL)
13401
13402     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13403       if self.op.allocator is None:
13404         raise errors.OpPrereqError("Missing allocator name",
13405                                    errors.ECODE_INVAL)
13406     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13407       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13408                                  self.op.direction, errors.ECODE_INVAL)
13409
13410   def Exec(self, feedback_fn):
13411     """Run the allocator test.
13412
13413     """
13414     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13415       ial = IAllocator(self.cfg, self.rpc,
13416                        mode=self.op.mode,
13417                        name=self.op.name,
13418                        memory=self.op.memory,
13419                        disks=self.op.disks,
13420                        disk_template=self.op.disk_template,
13421                        os=self.op.os,
13422                        tags=self.op.tags,
13423                        nics=self.op.nics,
13424                        vcpus=self.op.vcpus,
13425                        hypervisor=self.op.hypervisor,
13426                        )
13427     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13428       ial = IAllocator(self.cfg, self.rpc,
13429                        mode=self.op.mode,
13430                        name=self.op.name,
13431                        relocate_from=list(self.relocate_from),
13432                        )
13433     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13434       ial = IAllocator(self.cfg, self.rpc,
13435                        mode=self.op.mode,
13436                        instances=self.op.instances,
13437                        target_groups=self.op.target_groups)
13438     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13439       ial = IAllocator(self.cfg, self.rpc,
13440                        mode=self.op.mode,
13441                        instances=self.op.instances,
13442                        evac_mode=self.op.evac_mode)
13443     else:
13444       raise errors.ProgrammerError("Uncatched mode %s in"
13445                                    " LUTestAllocator.Exec", self.op.mode)
13446
13447     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13448       result = ial.in_text
13449     else:
13450       ial.Run(self.op.allocator, validate=False)
13451       result = ial.out_text
13452     return result
13453
13454
13455 #: Query type implementations
13456 _QUERY_IMPL = {
13457   constants.QR_INSTANCE: _InstanceQuery,
13458   constants.QR_NODE: _NodeQuery,
13459   constants.QR_GROUP: _GroupQuery,
13460   constants.QR_OS: _OsQuery,
13461   }
13462
13463 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13464
13465
13466 def _GetQueryImplementation(name):
13467   """Returns the implemtnation for a query type.
13468
13469   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13470
13471   """
13472   try:
13473     return _QUERY_IMPL[name]
13474   except KeyError:
13475     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13476                                errors.ECODE_INVAL)