code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62
  63 import ganeti.masterd.instance # pylint: disable=W0611
  64
  65
  66 class ResultWithJobs:
  67   """Data container for LU results with jobs.
  68
  69   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  70   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  71   contained in the C{jobs} attribute and include the job IDs in the opcode
  72   result.
  73
  74   """
  75   def __init__(self, jobs, **kwargs):
  76     """Initializes this class.
  77
  78     Additional return values can be specified as keyword arguments.
  79
  80     @type jobs: list of lists of L{opcode.OpCode}
  81     @param jobs: A list of lists of opcode objects
  82
  83     """
  84     self.jobs = jobs
  85     self.other = kwargs
  86
  87
  88 class LogicalUnit(object):
  89   """Logical Unit base class.
  90
  91   Subclasses must follow these rules:
  92     - implement ExpandNames
  93     - implement CheckPrereq (except when tasklets are used)
  94     - implement Exec (except when tasklets are used)
  95     - implement BuildHooksEnv
  96     - implement BuildHooksNodes
  97     - redefine HPATH and HTYPE
  98     - optionally redefine their run requirements:
  99         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 100
 101   Note that all commands require root permissions.
 102
 103   @ivar dry_run_result: the value (if any) that will be returned to the caller
 104       in dry-run mode (signalled by opcode dry_run parameter)
 105
 106   """
 107   HPATH = None
 108   HTYPE = None
 109   REQ_BGL = True
 110
 111   def __init__(self, processor, op, context, rpc):
 112     """Constructor for LogicalUnit.
 113
 114     This needs to be overridden in derived classes in order to check op
 115     validity.
 116
 117     """
 118     self.proc = processor
 119     self.op = op
 120     self.cfg = context.cfg
 121     self.glm = context.glm
 122     # readability alias
 123     self.owned_locks = context.glm.list_owned
 124     self.context = context
 125     self.rpc = rpc
 126     # Dicts used to declare locking needs to mcpu
 127     self.needed_locks = None
 128     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 129     self.add_locks = {}
 130     self.remove_locks = {}
 131     # Used to force good behavior when calling helper functions
 132     self.recalculate_locks = {}
 133     # logging
 134     self.Log = processor.Log # pylint: disable=C0103
 135     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 136     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 137     self.LogStep = processor.LogStep # pylint: disable=C0103
 138     # support for dry-run
 139     self.dry_run_result = None
 140     # support for generic debug attribute
 141     if (not hasattr(self.op, "debug_level") or
 142         not isinstance(self.op.debug_level, int)):
 143       self.op.debug_level = 0
 144
 145     # Tasklets
 146     self.tasklets = None
 147
 148     # Validate opcode parameters and set defaults
 149     self.op.Validate(True)
 150
 151     self.CheckArguments()
 152
 153   def CheckArguments(self):
 154     """Check syntactic validity for the opcode arguments.
 155
 156     This method is for doing a simple syntactic check and ensure
 157     validity of opcode parameters, without any cluster-related
 158     checks. While the same can be accomplished in ExpandNames and/or
 159     CheckPrereq, doing these separate is better because:
 160
 161       - ExpandNames is left as as purely a lock-related function
 162       - CheckPrereq is run after we have acquired locks (and possible
 163         waited for them)
 164
 165     The function is allowed to change the self.op attribute so that
 166     later methods can no longer worry about missing parameters.
 167
 168     """
 169     pass
 170
 171   def ExpandNames(self):
 172     """Expand names for this LU.
 173
 174     This method is called before starting to execute the opcode, and it should
 175     update all the parameters of the opcode to their canonical form (e.g. a
 176     short node name must be fully expanded after this method has successfully
 177     completed). This way locking, hooks, logging, etc. can work correctly.
 178
 179     LUs which implement this method must also populate the self.needed_locks
 180     member, as a dict with lock levels as keys, and a list of needed lock names
 181     as values. Rules:
 182
 183       - use an empty dict if you don't need any lock
 184       - if you don't need any lock at a particular level omit that level
 185       - don't put anything for the BGL level
 186       - if you want all locks at a level use locking.ALL_SET as a value
 187
 188     If you need to share locks (rather than acquire them exclusively) at one
 189     level you can modify self.share_locks, setting a true value (usually 1) for
 190     that level. By default locks are not shared.
 191
 192     This function can also define a list of tasklets, which then will be
 193     executed in order instead of the usual LU-level CheckPrereq and Exec
 194     functions, if those are not defined by the LU.
 195
 196     Examples::
 197
 198       # Acquire all nodes and one instance
 199       self.needed_locks = {
 200         locking.LEVEL_NODE: locking.ALL_SET,
 201         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 202       }
 203       # Acquire just two nodes
 204       self.needed_locks = {
 205         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 206       }
 207       # Acquire no locks
 208       self.needed_locks = {} # No, you can't leave it to the default value None
 209
 210     """
 211     # The implementation of this method is mandatory only if the new LU is
 212     # concurrent, so that old LUs don't need to be changed all at the same
 213     # time.
 214     if self.REQ_BGL:
 215       self.needed_locks = {} # Exclusive LUs don't need locks.
 216     else:
 217       raise NotImplementedError
 218
 219   def DeclareLocks(self, level):
 220     """Declare LU locking needs for a level
 221
 222     While most LUs can just declare their locking needs at ExpandNames time,
 223     sometimes there's the need to calculate some locks after having acquired
 224     the ones before. This function is called just before acquiring locks at a
 225     particular level, but after acquiring the ones at lower levels, and permits
 226     such calculations. It can be used to modify self.needed_locks, and by
 227     default it does nothing.
 228
 229     This function is only called if you have something already set in
 230     self.needed_locks for the level.
 231
 232     @param level: Locking level which is going to be locked
 233     @type level: member of ganeti.locking.LEVELS
 234
 235     """
 236
 237   def CheckPrereq(self):
 238     """Check prerequisites for this LU.
 239
 240     This method should check that the prerequisites for the execution
 241     of this LU are fulfilled. It can do internode communication, but
 242     it should be idempotent - no cluster or system changes are
 243     allowed.
 244
 245     The method should raise errors.OpPrereqError in case something is
 246     not fulfilled. Its return value is ignored.
 247
 248     This method should also update all the parameters of the opcode to
 249     their canonical form if it hasn't been done by ExpandNames before.
 250
 251     """
 252     if self.tasklets is not None:
 253       for (idx, tl) in enumerate(self.tasklets):
 254         logging.debug("Checking prerequisites for tasklet %s/%s",
 255                       idx + 1, len(self.tasklets))
 256         tl.CheckPrereq()
 257     else:
 258       pass
 259
 260   def Exec(self, feedback_fn):
 261     """Execute the LU.
 262
 263     This method should implement the actual work. It should raise
 264     errors.OpExecError for failures that are somewhat dealt with in
 265     code, or expected.
 266
 267     """
 268     if self.tasklets is not None:
 269       for (idx, tl) in enumerate(self.tasklets):
 270         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 271         tl.Exec(feedback_fn)
 272     else:
 273       raise NotImplementedError
 274
 275   def BuildHooksEnv(self):
 276     """Build hooks environment for this LU.
 277
 278     @rtype: dict
 279     @return: Dictionary containing the environment that will be used for
 280       running the hooks for this LU. The keys of the dict must not be prefixed
 281       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 282       will extend the environment with additional variables. If no environment
 283       should be defined, an empty dictionary should be returned (not C{None}).
 284     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 285       will not be called.
 286
 287     """
 288     raise NotImplementedError
 289
 290   def BuildHooksNodes(self):
 291     """Build list of nodes to run LU's hooks.
 292
 293     @rtype: tuple; (list, list)
 294     @return: Tuple containing a list of node names on which the hook
 295       should run before the execution and a list of node names on which the
 296       hook should run after the execution. No nodes should be returned as an
 297       empty list (and not None).
 298     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 299       will not be called.
 300
 301     """
 302     raise NotImplementedError
 303
 304   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 305     """Notify the LU about the results of its hooks.
 306
 307     This method is called every time a hooks phase is executed, and notifies
 308     the Logical Unit about the hooks' result. The LU can then use it to alter
 309     its result based on the hooks.  By default the method does nothing and the
 310     previous result is passed back unchanged but any LU can define it if it
 311     wants to use the local cluster hook-scripts somehow.
 312
 313     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 314         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 315     @param hook_results: the results of the multi-node hooks rpc call
 316     @param feedback_fn: function used send feedback back to the caller
 317     @param lu_result: the previous Exec result this LU had, or None
 318         in the PRE phase
 319     @return: the new Exec result, based on the previous result
 320         and hook results
 321
 322     """
 323     # API must be kept, thus we ignore the unused argument and could
 324     # be a function warnings
 325     # pylint: disable=W0613,R0201
 326     return lu_result
 327
 328   def _ExpandAndLockInstance(self):
 329     """Helper function to expand and lock an instance.
 330
 331     Many LUs that work on an instance take its name in self.op.instance_name
 332     and need to expand it and then declare the expanded name for locking. This
 333     function does it, and then updates self.op.instance_name to the expanded
 334     name. It also initializes needed_locks as a dict, if this hasn't been done
 335     before.
 336
 337     """
 338     if self.needed_locks is None:
 339       self.needed_locks = {}
 340     else:
 341       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 342         "_ExpandAndLockInstance called with instance-level locks set"
 343     self.op.instance_name = _ExpandInstanceName(self.cfg,
 344                                                 self.op.instance_name)
 345     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 346
 347   def _LockInstancesNodes(self, primary_only=False):
 348     """Helper function to declare instances' nodes for locking.
 349
 350     This function should be called after locking one or more instances to lock
 351     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 352     with all primary or secondary nodes for instances already locked and
 353     present in self.needed_locks[locking.LEVEL_INSTANCE].
 354
 355     It should be called from DeclareLocks, and for safety only works if
 356     self.recalculate_locks[locking.LEVEL_NODE] is set.
 357
 358     In the future it may grow parameters to just lock some instance's nodes, or
 359     to just lock primaries or secondary nodes, if needed.
 360
 361     If should be called in DeclareLocks in a way similar to::
 362
 363       if level == locking.LEVEL_NODE:
 364         self._LockInstancesNodes()
 365
 366     @type primary_only: boolean
 367     @param primary_only: only lock primary nodes of locked instances
 368
 369     """
 370     assert locking.LEVEL_NODE in self.recalculate_locks, \
 371       "_LockInstancesNodes helper function called with no nodes to recalculate"
 372
 373     # TODO: check if we're really been called with the instance locks held
 374
 375     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 376     # future we might want to have different behaviors depending on the value
 377     # of self.recalculate_locks[locking.LEVEL_NODE]
 378     wanted_nodes = []
 379     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 380     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 381       wanted_nodes.append(instance.primary_node)
 382       if not primary_only:
 383         wanted_nodes.extend(instance.secondary_nodes)
 384
 385     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 386       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 387     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 388       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 389
 390     del self.recalculate_locks[locking.LEVEL_NODE]
 391
 392
 393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 394   """Simple LU which runs no hooks.
 395
 396   This LU is intended as a parent for other LogicalUnits which will
 397   run no hooks, in order to reduce duplicate code.
 398
 399   """
 400   HPATH = None
 401   HTYPE = None
 402
 403   def BuildHooksEnv(self):
 404     """Empty BuildHooksEnv for NoHooksLu.
 405
 406     This just raises an error.
 407
 408     """
 409     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 410
 411   def BuildHooksNodes(self):
 412     """Empty BuildHooksNodes for NoHooksLU.
 413
 414     """
 415     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 416
 417
 418 class Tasklet:
 419   """Tasklet base class.
 420
 421   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 422   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 423   tasklets know nothing about locks.
 424
 425   Subclasses must follow these rules:
 426     - Implement CheckPrereq
 427     - Implement Exec
 428
 429   """
 430   def __init__(self, lu):
 431     self.lu = lu
 432
 433     # Shortcuts
 434     self.cfg = lu.cfg
 435     self.rpc = lu.rpc
 436
 437   def CheckPrereq(self):
 438     """Check prerequisites for this tasklets.
 439
 440     This method should check whether the prerequisites for the execution of
 441     this tasklet are fulfilled. It can do internode communication, but it
 442     should be idempotent - no cluster or system changes are allowed.
 443
 444     The method should raise errors.OpPrereqError in case something is not
 445     fulfilled. Its return value is ignored.
 446
 447     This method should also update all parameters to their canonical form if it
 448     hasn't been done before.
 449
 450     """
 451     pass
 452
 453   def Exec(self, feedback_fn):
 454     """Execute the tasklet.
 455
 456     This method should implement the actual work. It should raise
 457     errors.OpExecError for failures that are somewhat dealt with in code, or
 458     expected.
 459
 460     """
 461     raise NotImplementedError
 462
 463
 464 class _QueryBase:
 465   """Base for query utility classes.
 466
 467   """
 468   #: Attribute holding field definitions
 469   FIELDS = None
 470
 471   def __init__(self, filter_, fields, use_locking):
 472     """Initializes this class.
 473
 474     """
 475     self.use_locking = use_locking
 476
 477     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 478                              namefield="name")
 479     self.requested_data = self.query.RequestedData()
 480     self.names = self.query.RequestedNames()
 481
 482     # Sort only if no names were requested
 483     self.sort_by_name = not self.names
 484
 485     self.do_locking = None
 486     self.wanted = None
 487
 488   def _GetNames(self, lu, all_names, lock_level):
 489     """Helper function to determine names asked for in the query.
 490
 491     """
 492     if self.do_locking:
 493       names = lu.owned_locks(lock_level)
 494     else:
 495       names = all_names
 496
 497     if self.wanted == locking.ALL_SET:
 498       assert not self.names
 499       # caller didn't specify names, so ordering is not important
 500       return utils.NiceSort(names)
 501
 502     # caller specified names and we must keep the same order
 503     assert self.names
 504     assert not self.do_locking or lu.glm.is_owned(lock_level)
 505
 506     missing = set(self.wanted).difference(names)
 507     if missing:
 508       raise errors.OpExecError("Some items were removed before retrieving"
 509                                " their data: %s" % missing)
 510
 511     # Return expanded names
 512     return self.wanted
 513
 514   def ExpandNames(self, lu):
 515     """Expand names for this query.
 516
 517     See L{LogicalUnit.ExpandNames}.
 518
 519     """
 520     raise NotImplementedError()
 521
 522   def DeclareLocks(self, lu, level):
 523     """Declare locks for this query.
 524
 525     See L{LogicalUnit.DeclareLocks}.
 526
 527     """
 528     raise NotImplementedError()
 529
 530   def _GetQueryData(self, lu):
 531     """Collects all data for this query.
 532
 533     @return: Query data object
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def NewStyleQuery(self, lu):
 539     """Collect data and execute query.
 540
 541     """
 542     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 543                                   sort_by_name=self.sort_by_name)
 544
 545   def OldStyleQuery(self, lu):
 546     """Collect data and execute query.
 547
 548     """
 549     return self.query.OldStyleQuery(self._GetQueryData(lu),
 550                                     sort_by_name=self.sort_by_name)
 551
 552
 553 def _ShareAll():
 554   """Returns a dict declaring all lock levels shared.
 555
 556   """
 557   return dict.fromkeys(locking.LEVELS, 1)
 558
 559
 560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 561   """Checks if the owned node groups are still correct for an instance.
 562
 563   @type cfg: L{config.ConfigWriter}
 564   @param cfg: The cluster configuration
 565   @type instance_name: string
 566   @param instance_name: Instance name
 567   @type owned_groups: set or frozenset
 568   @param owned_groups: List of currently owned node groups
 569
 570   """
 571   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 572
 573   if not owned_groups.issuperset(inst_groups):
 574     raise errors.OpPrereqError("Instance %s's node groups changed since"
 575                                " locks were acquired, current groups are"
 576                                " are '%s', owning groups '%s'; retry the"
 577                                " operation" %
 578                                (instance_name,
 579                                 utils.CommaJoin(inst_groups),
 580                                 utils.CommaJoin(owned_groups)),
 581                                errors.ECODE_STATE)
 582
 583   return inst_groups
 584
 585
 586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 587   """Checks if the instances in a node group are still correct.
 588
 589   @type cfg: L{config.ConfigWriter}
 590   @param cfg: The cluster configuration
 591   @type group_uuid: string
 592   @param group_uuid: Node group UUID
 593   @type owned_instances: set or frozenset
 594   @param owned_instances: List of currently owned instances
 595
 596   """
 597   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 598   if owned_instances != wanted_instances:
 599     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 600                                " locks were acquired, wanted '%s', have '%s';"
 601                                " retry the operation" %
 602                                (group_uuid,
 603                                 utils.CommaJoin(wanted_instances),
 604                                 utils.CommaJoin(owned_instances)),
 605                                errors.ECODE_STATE)
 606
 607   return wanted_instances
 608
 609
 610 def _SupportsOob(cfg, node):
 611   """Tells if node supports OOB.
 612
 613   @type cfg: L{config.ConfigWriter}
 614   @param cfg: The cluster configuration
 615   @type node: L{objects.Node}
 616   @param node: The node
 617   @return: The OOB script if supported or an empty string otherwise
 618
 619   """
 620   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 621
 622
 623 def _GetWantedNodes(lu, nodes):
 624   """Returns list of checked and expanded node names.
 625
 626   @type lu: L{LogicalUnit}
 627   @param lu: the logical unit on whose behalf we execute
 628   @type nodes: list
 629   @param nodes: list of node names or None for all nodes
 630   @rtype: list
 631   @return: the list of nodes, sorted
 632   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 633
 634   """
 635   if nodes:
 636     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 637
 638   return utils.NiceSort(lu.cfg.GetNodeList())
 639
 640
 641 def _GetWantedInstances(lu, instances):
 642   """Returns list of checked and expanded instance names.
 643
 644   @type lu: L{LogicalUnit}
 645   @param lu: the logical unit on whose behalf we execute
 646   @type instances: list
 647   @param instances: list of instance names or None for all instances
 648   @rtype: list
 649   @return: the list of instances, sorted
 650   @raise errors.OpPrereqError: if the instances parameter is wrong type
 651   @raise errors.OpPrereqError: if any of the passed instances is not found
 652
 653   """
 654   if instances:
 655     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 656   else:
 657     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 658   return wanted
 659
 660
 661 def _GetUpdatedParams(old_params, update_dict,
 662                       use_default=True, use_none=False):
 663   """Return the new version of a parameter dictionary.
 664
 665   @type old_params: dict
 666   @param old_params: old parameters
 667   @type update_dict: dict
 668   @param update_dict: dict containing new parameter values, or
 669       constants.VALUE_DEFAULT to reset the parameter to its default
 670       value
 671   @param use_default: boolean
 672   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 673       values as 'to be deleted' values
 674   @param use_none: boolean
 675   @type use_none: whether to recognise C{None} values as 'to be
 676       deleted' values
 677   @rtype: dict
 678   @return: the new parameter dictionary
 679
 680   """
 681   params_copy = copy.deepcopy(old_params)
 682   for key, val in update_dict.iteritems():
 683     if ((use_default and val == constants.VALUE_DEFAULT) or
 684         (use_none and val is None)):
 685       try:
 686         del params_copy[key]
 687       except KeyError:
 688         pass
 689     else:
 690       params_copy[key] = val
 691   return params_copy
 692
 693
 694 def _ReleaseLocks(lu, level, names=None, keep=None):
 695   """Releases locks owned by an LU.
 696
 697   @type lu: L{LogicalUnit}
 698   @param level: Lock level
 699   @type names: list or None
 700   @param names: Names of locks to release
 701   @type keep: list or None
 702   @param keep: Names of locks to retain
 703
 704   """
 705   assert not (keep is not None and names is not None), \
 706          "Only one of the 'names' and the 'keep' parameters can be given"
 707
 708   if names is not None:
 709     should_release = names.__contains__
 710   elif keep:
 711     should_release = lambda name: name not in keep
 712   else:
 713     should_release = None
 714
 715   if should_release:
 716     retain = []
 717     release = []
 718
 719     # Determine which locks to release
 720     for name in lu.owned_locks(level):
 721       if should_release(name):
 722         release.append(name)
 723       else:
 724         retain.append(name)
 725
 726     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 727
 728     # Release just some locks
 729     lu.glm.release(level, names=release)
 730
 731     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 732   else:
 733     # Release everything
 734     lu.glm.release(level)
 735
 736     assert not lu.glm.is_owned(level), "No locks should be owned"
 737
 738
 739 def _MapInstanceDisksToNodes(instances):
 740   """Creates a map from (node, volume) to instance name.
 741
 742   @type instances: list of L{objects.Instance}
 743   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 744
 745   """
 746   return dict(((node, vol), inst.name)
 747               for inst in instances
 748               for (node, vols) in inst.MapLVsByNode().items()
 749               for vol in vols)
 750
 751
 752 def _RunPostHook(lu, node_name):
 753   """Runs the post-hook for an opcode on a single node.
 754
 755   """
 756   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 757   try:
 758     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 759   except:
 760     # pylint: disable=W0702
 761     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 762
 763
 764 def _CheckOutputFields(static, dynamic, selected):
 765   """Checks whether all selected fields are valid.
 766
 767   @type static: L{utils.FieldSet}
 768   @param static: static fields set
 769   @type dynamic: L{utils.FieldSet}
 770   @param dynamic: dynamic fields set
 771
 772   """
 773   f = utils.FieldSet()
 774   f.Extend(static)
 775   f.Extend(dynamic)
 776
 777   delta = f.NonMatching(selected)
 778   if delta:
 779     raise errors.OpPrereqError("Unknown output fields selected: %s"
 780                                % ",".join(delta), errors.ECODE_INVAL)
 781
 782
 783 def _CheckGlobalHvParams(params):
 784   """Validates that given hypervisor params are not global ones.
 785
 786   This will ensure that instances don't get customised versions of
 787   global params.
 788
 789   """
 790   used_globals = constants.HVC_GLOBALS.intersection(params)
 791   if used_globals:
 792     msg = ("The following hypervisor parameters are global and cannot"
 793            " be customized at instance level, please modify them at"
 794            " cluster level: %s" % utils.CommaJoin(used_globals))
 795     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 796
 797
 798 def _CheckNodeOnline(lu, node, msg=None):
 799   """Ensure that a given node is online.
 800
 801   @param lu: the LU on behalf of which we make the check
 802   @param node: the node to check
 803   @param msg: if passed, should be a message to replace the default one
 804   @raise errors.OpPrereqError: if the node is offline
 805
 806   """
 807   if msg is None:
 808     msg = "Can't use offline node"
 809   if lu.cfg.GetNodeInfo(node).offline:
 810     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 811
 812
 813 def _CheckNodeNotDrained(lu, node):
 814   """Ensure that a given node is not drained.
 815
 816   @param lu: the LU on behalf of which we make the check
 817   @param node: the node to check
 818   @raise errors.OpPrereqError: if the node is drained
 819
 820   """
 821   if lu.cfg.GetNodeInfo(node).drained:
 822     raise errors.OpPrereqError("Can't use drained node %s" % node,
 823                                errors.ECODE_STATE)
 824
 825
 826 def _CheckNodeVmCapable(lu, node):
 827   """Ensure that a given node is vm capable.
 828
 829   @param lu: the LU on behalf of which we make the check
 830   @param node: the node to check
 831   @raise errors.OpPrereqError: if the node is not vm capable
 832
 833   """
 834   if not lu.cfg.GetNodeInfo(node).vm_capable:
 835     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 836                                errors.ECODE_STATE)
 837
 838
 839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 840   """Ensure that a node supports a given OS.
 841
 842   @param lu: the LU on behalf of which we make the check
 843   @param node: the node to check
 844   @param os_name: the OS to query about
 845   @param force_variant: whether to ignore variant errors
 846   @raise errors.OpPrereqError: if the node is not supporting the OS
 847
 848   """
 849   result = lu.rpc.call_os_get(node, os_name)
 850   result.Raise("OS '%s' not in supported OS list for node %s" %
 851                (os_name, node),
 852                prereq=True, ecode=errors.ECODE_INVAL)
 853   if not force_variant:
 854     _CheckOSVariant(result.payload, os_name)
 855
 856
 857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 858   """Ensure that a node has the given secondary ip.
 859
 860   @type lu: L{LogicalUnit}
 861   @param lu: the LU on behalf of which we make the check
 862   @type node: string
 863   @param node: the node to check
 864   @type secondary_ip: string
 865   @param secondary_ip: the ip to check
 866   @type prereq: boolean
 867   @param prereq: whether to throw a prerequisite or an execute error
 868   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 869   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 870
 871   """
 872   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 873   result.Raise("Failure checking secondary ip on node %s" % node,
 874                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 875   if not result.payload:
 876     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 877            " please fix and re-run this command" % secondary_ip)
 878     if prereq:
 879       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 880     else:
 881       raise errors.OpExecError(msg)
 882
 883
 884 def _GetClusterDomainSecret():
 885   """Reads the cluster domain secret.
 886
 887   """
 888   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 889                                strict=True)
 890
 891
 892 def _CheckInstanceDown(lu, instance, reason):
 893   """Ensure that an instance is not running."""
 894   if instance.admin_up:
 895     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 896                                (instance.name, reason), errors.ECODE_STATE)
 897
 898   pnode = instance.primary_node
 899   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 900   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 901               prereq=True, ecode=errors.ECODE_ENVIRON)
 902
 903   if instance.name in ins_l.payload:
 904     raise errors.OpPrereqError("Instance %s is running, %s" %
 905                                (instance.name, reason), errors.ECODE_STATE)
 906
 907
 908 def _ExpandItemName(fn, name, kind):
 909   """Expand an item name.
 910
 911   @param fn: the function to use for expansion
 912   @param name: requested item name
 913   @param kind: text description ('Node' or 'Instance')
 914   @return: the resolved (full) name
 915   @raise errors.OpPrereqError: if the item is not found
 916
 917   """
 918   full_name = fn(name)
 919   if full_name is None:
 920     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 921                                errors.ECODE_NOENT)
 922   return full_name
 923
 924
 925 def _ExpandNodeName(cfg, name):
 926   """Wrapper over L{_ExpandItemName} for nodes."""
 927   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 928
 929
 930 def _ExpandInstanceName(cfg, name):
 931   """Wrapper over L{_ExpandItemName} for instance."""
 932   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 933
 934
 935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 936                           memory, vcpus, nics, disk_template, disks,
 937                           bep, hvp, hypervisor_name, tags):
 938   """Builds instance related env variables for hooks
 939
 940   This builds the hook environment from individual variables.
 941
 942   @type name: string
 943   @param name: the name of the instance
 944   @type primary_node: string
 945   @param primary_node: the name of the instance's primary node
 946   @type secondary_nodes: list
 947   @param secondary_nodes: list of secondary nodes as strings
 948   @type os_type: string
 949   @param os_type: the name of the instance's OS
 950   @type status: boolean
 951   @param status: the should_run status of the instance
 952   @type memory: string
 953   @param memory: the memory size of the instance
 954   @type vcpus: string
 955   @param vcpus: the count of VCPUs the instance has
 956   @type nics: list
 957   @param nics: list of tuples (ip, mac, mode, link) representing
 958       the NICs the instance has
 959   @type disk_template: string
 960   @param disk_template: the disk template of the instance
 961   @type disks: list
 962   @param disks: the list of (size, mode) pairs
 963   @type bep: dict
 964   @param bep: the backend parameters for the instance
 965   @type hvp: dict
 966   @param hvp: the hypervisor parameters for the instance
 967   @type hypervisor_name: string
 968   @param hypervisor_name: the hypervisor for the instance
 969   @type tags: list
 970   @param tags: list of instance tags as strings
 971   @rtype: dict
 972   @return: the hook environment for this instance
 973
 974   """
 975   if status:
 976     str_status = "up"
 977   else:
 978     str_status = "down"
 979   env = {
 980     "OP_TARGET": name,
 981     "INSTANCE_NAME": name,
 982     "INSTANCE_PRIMARY": primary_node,
 983     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 984     "INSTANCE_OS_TYPE": os_type,
 985     "INSTANCE_STATUS": str_status,
 986     "INSTANCE_MEMORY": memory,
 987     "INSTANCE_VCPUS": vcpus,
 988     "INSTANCE_DISK_TEMPLATE": disk_template,
 989     "INSTANCE_HYPERVISOR": hypervisor_name,
 990   }
 991
 992   if nics:
 993     nic_count = len(nics)
 994     for idx, (ip, mac, mode, link) in enumerate(nics):
 995       if ip is None:
 996         ip = ""
 997       env["INSTANCE_NIC%d_IP" % idx] = ip
 998       env["INSTANCE_NIC%d_MAC" % idx] = mac
 999       env["INSTANCE_NIC%d_MODE" % idx] = mode
1000       env["INSTANCE_NIC%d_LINK" % idx] = link
1001       if mode == constants.NIC_MODE_BRIDGED:
1002         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003   else:
1004     nic_count = 0
1005
1006   env["INSTANCE_NIC_COUNT"] = nic_count
1007
1008   if disks:
1009     disk_count = len(disks)
1010     for idx, (size, mode) in enumerate(disks):
1011       env["INSTANCE_DISK%d_SIZE" % idx] = size
1012       env["INSTANCE_DISK%d_MODE" % idx] = mode
1013   else:
1014     disk_count = 0
1015
1016   env["INSTANCE_DISK_COUNT"] = disk_count
1017
1018   if not tags:
1019     tags = []
1020
1021   env["INSTANCE_TAGS"] = " ".join(tags)
1022
1023   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024     for key, value in source.items():
1025       env["INSTANCE_%s_%s" % (kind, key)] = value
1026
1027   return env
1028
1029
1030 def _NICListToTuple(lu, nics):
1031   """Build a list of nic information tuples.
1032
1033   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034   value in LUInstanceQueryData.
1035
1036   @type lu:  L{LogicalUnit}
1037   @param lu: the logical unit on whose behalf we execute
1038   @type nics: list of L{objects.NIC}
1039   @param nics: list of nics to convert to hooks tuples
1040
1041   """
1042   hooks_nics = []
1043   cluster = lu.cfg.GetClusterInfo()
1044   for nic in nics:
1045     ip = nic.ip
1046     mac = nic.mac
1047     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048     mode = filled_params[constants.NIC_MODE]
1049     link = filled_params[constants.NIC_LINK]
1050     hooks_nics.append((ip, mac, mode, link))
1051   return hooks_nics
1052
1053
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055   """Builds instance related env variables for hooks from an object.
1056
1057   @type lu: L{LogicalUnit}
1058   @param lu: the logical unit on whose behalf we execute
1059   @type instance: L{objects.Instance}
1060   @param instance: the instance for which we should build the
1061       environment
1062   @type override: dict
1063   @param override: dictionary with key/values that will override
1064       our values
1065   @rtype: dict
1066   @return: the hook environment dictionary
1067
1068   """
1069   cluster = lu.cfg.GetClusterInfo()
1070   bep = cluster.FillBE(instance)
1071   hvp = cluster.FillHV(instance)
1072   args = {
1073     "name": instance.name,
1074     "primary_node": instance.primary_node,
1075     "secondary_nodes": instance.secondary_nodes,
1076     "os_type": instance.os,
1077     "status": instance.admin_up,
1078     "memory": bep[constants.BE_MEMORY],
1079     "vcpus": bep[constants.BE_VCPUS],
1080     "nics": _NICListToTuple(lu, instance.nics),
1081     "disk_template": instance.disk_template,
1082     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083     "bep": bep,
1084     "hvp": hvp,
1085     "hypervisor_name": instance.hypervisor,
1086     "tags": instance.tags,
1087   }
1088   if override:
1089     args.update(override)
1090   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091
1092
1093 def _AdjustCandidatePool(lu, exceptions):
1094   """Adjust the candidate pool after node operations.
1095
1096   """
1097   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098   if mod_list:
1099     lu.LogInfo("Promoted nodes to master candidate role: %s",
1100                utils.CommaJoin(node.name for node in mod_list))
1101     for name in mod_list:
1102       lu.context.ReaddNode(name)
1103   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104   if mc_now > mc_max:
1105     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106                (mc_now, mc_max))
1107
1108
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110   """Decide whether I should promote myself as a master candidate.
1111
1112   """
1113   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115   # the new node will increase mc_max with one, so:
1116   mc_should = min(mc_should + 1, cp_size)
1117   return mc_now < mc_should
1118
1119
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121   """Check that the brigdes needed by a list of nics exist.
1122
1123   """
1124   cluster = lu.cfg.GetClusterInfo()
1125   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126   brlist = [params[constants.NIC_LINK] for params in paramslist
1127             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128   if brlist:
1129     result = lu.rpc.call_bridges_exist(target_node, brlist)
1130     result.Raise("Error checking bridges on destination node '%s'" %
1131                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132
1133
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135   """Check that the brigdes needed by an instance exist.
1136
1137   """
1138   if node is None:
1139     node = instance.primary_node
1140   _CheckNicsBridgesExist(lu, instance.nics, node)
1141
1142
1143 def _CheckOSVariant(os_obj, name):
1144   """Check whether an OS name conforms to the os variants specification.
1145
1146   @type os_obj: L{objects.OS}
1147   @param os_obj: OS object to check
1148   @type name: string
1149   @param name: OS name passed by the user, to check for validity
1150
1151   """
1152   variant = objects.OS.GetVariant(name)
1153   if not os_obj.supported_variants:
1154     if variant:
1155       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156                                  " passed)" % (os_obj.name, variant),
1157                                  errors.ECODE_INVAL)
1158     return
1159   if not variant:
1160     raise errors.OpPrereqError("OS name must include a variant",
1161                                errors.ECODE_INVAL)
1162
1163   if variant not in os_obj.supported_variants:
1164     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165
1166
1167 def _GetNodeInstancesInner(cfg, fn):
1168   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169
1170
1171 def _GetNodeInstances(cfg, node_name):
1172   """Returns a list of all primary and secondary instances on a node.
1173
1174   """
1175
1176   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177
1178
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180   """Returns primary instances on a node.
1181
1182   """
1183   return _GetNodeInstancesInner(cfg,
1184                                 lambda inst: node_name == inst.primary_node)
1185
1186
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188   """Returns secondary instances on a node.
1189
1190   """
1191   return _GetNodeInstancesInner(cfg,
1192                                 lambda inst: node_name in inst.secondary_nodes)
1193
1194
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196   """Returns the arguments for a storage type.
1197
1198   """
1199   # Special case for file storage
1200   if storage_type == constants.ST_FILE:
1201     # storage.FileStorage wants a list of storage directories
1202     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203
1204   return []
1205
1206
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208   faulty = []
1209
1210   for dev in instance.disks:
1211     cfg.SetDiskID(dev, node_name)
1212
1213   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214   result.Raise("Failed to get disk status from node %s" % node_name,
1215                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216
1217   for idx, bdev_status in enumerate(result.payload):
1218     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219       faulty.append(idx)
1220
1221   return faulty
1222
1223
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225   """Check the sanity of iallocator and node arguments and use the
1226   cluster-wide iallocator if appropriate.
1227
1228   Check that at most one of (iallocator, node) is specified. If none is
1229   specified, then the LU's opcode's iallocator slot is filled with the
1230   cluster-wide default iallocator.
1231
1232   @type iallocator_slot: string
1233   @param iallocator_slot: the name of the opcode iallocator slot
1234   @type node_slot: string
1235   @param node_slot: the name of the opcode target node slot
1236
1237   """
1238   node = getattr(lu.op, node_slot, None)
1239   iallocator = getattr(lu.op, iallocator_slot, None)
1240
1241   if node is not None and iallocator is not None:
1242     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243                                errors.ECODE_INVAL)
1244   elif node is None and iallocator is None:
1245     default_iallocator = lu.cfg.GetDefaultIAllocator()
1246     if default_iallocator:
1247       setattr(lu.op, iallocator_slot, default_iallocator)
1248     else:
1249       raise errors.OpPrereqError("No iallocator or node given and no"
1250                                  " cluster-wide default iallocator found;"
1251                                  " please specify either an iallocator or a"
1252                                  " node, or set a cluster-wide default"
1253                                  " iallocator")
1254
1255
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257   """Decides on which iallocator to use.
1258
1259   @type cfg: L{config.ConfigWriter}
1260   @param cfg: Cluster configuration object
1261   @type iallocator: string or None
1262   @param iallocator: Iallocator specified in opcode
1263   @rtype: string
1264   @return: Iallocator name
1265
1266   """
1267   if not iallocator:
1268     # Use default iallocator
1269     iallocator = cfg.GetDefaultIAllocator()
1270
1271   if not iallocator:
1272     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273                                " opcode nor as a cluster-wide default",
1274                                errors.ECODE_INVAL)
1275
1276   return iallocator
1277
1278
1279 class LUClusterPostInit(LogicalUnit):
1280   """Logical unit for running hooks after cluster initialization.
1281
1282   """
1283   HPATH = "cluster-init"
1284   HTYPE = constants.HTYPE_CLUSTER
1285
1286   def BuildHooksEnv(self):
1287     """Build hooks env.
1288
1289     """
1290     return {
1291       "OP_TARGET": self.cfg.GetClusterName(),
1292       }
1293
1294   def BuildHooksNodes(self):
1295     """Build hooks nodes.
1296
1297     """
1298     return ([], [self.cfg.GetMasterNode()])
1299
1300   def Exec(self, feedback_fn):
1301     """Nothing to do.
1302
1303     """
1304     return True
1305
1306
1307 class LUClusterDestroy(LogicalUnit):
1308   """Logical unit for destroying the cluster.
1309
1310   """
1311   HPATH = "cluster-destroy"
1312   HTYPE = constants.HTYPE_CLUSTER
1313
1314   def BuildHooksEnv(self):
1315     """Build hooks env.
1316
1317     """
1318     return {
1319       "OP_TARGET": self.cfg.GetClusterName(),
1320       }
1321
1322   def BuildHooksNodes(self):
1323     """Build hooks nodes.
1324
1325     """
1326     return ([], [])
1327
1328   def CheckPrereq(self):
1329     """Check prerequisites.
1330
1331     This checks whether the cluster is empty.
1332
1333     Any errors are signaled by raising errors.OpPrereqError.
1334
1335     """
1336     master = self.cfg.GetMasterNode()
1337
1338     nodelist = self.cfg.GetNodeList()
1339     if len(nodelist) != 1 or nodelist[0] != master:
1340       raise errors.OpPrereqError("There are still %d node(s) in"
1341                                  " this cluster." % (len(nodelist) - 1),
1342                                  errors.ECODE_INVAL)
1343     instancelist = self.cfg.GetInstanceList()
1344     if instancelist:
1345       raise errors.OpPrereqError("There are still %d instance(s) in"
1346                                  " this cluster." % len(instancelist),
1347                                  errors.ECODE_INVAL)
1348
1349   def Exec(self, feedback_fn):
1350     """Destroys the cluster.
1351
1352     """
1353     master = self.cfg.GetMasterNode()
1354
1355     # Run post hooks on master node before it's removed
1356     _RunPostHook(self, master)
1357
1358     result = self.rpc.call_node_stop_master(master, False)
1359     result.Raise("Could not disable the master role")
1360
1361     return master
1362
1363
1364 def _VerifyCertificate(filename):
1365   """Verifies a certificate for L{LUClusterVerifyConfig}.
1366
1367   @type filename: string
1368   @param filename: Path to PEM file
1369
1370   """
1371   try:
1372     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373                                            utils.ReadFile(filename))
1374   except Exception, err: # pylint: disable=W0703
1375     return (LUClusterVerifyConfig.ETYPE_ERROR,
1376             "Failed to load X509 certificate %s: %s" % (filename, err))
1377
1378   (errcode, msg) = \
1379     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380                                 constants.SSL_CERT_EXPIRATION_ERROR)
1381
1382   if msg:
1383     fnamemsg = "While verifying %s: %s" % (filename, msg)
1384   else:
1385     fnamemsg = None
1386
1387   if errcode is None:
1388     return (None, fnamemsg)
1389   elif errcode == utils.CERT_WARNING:
1390     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391   elif errcode == utils.CERT_ERROR:
1392     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393
1394   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395
1396
1397 def _GetAllHypervisorParameters(cluster, instances):
1398   """Compute the set of all hypervisor parameters.
1399
1400   @type cluster: L{objects.Cluster}
1401   @param cluster: the cluster object
1402   @param instances: list of L{objects.Instance}
1403   @param instances: additional instances from which to obtain parameters
1404   @rtype: list of (origin, hypervisor, parameters)
1405   @return: a list with all parameters found, indicating the hypervisor they
1406        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407
1408   """
1409   hvp_data = []
1410
1411   for hv_name in cluster.enabled_hypervisors:
1412     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413
1414   for os_name, os_hvp in cluster.os_hvp.items():
1415     for hv_name, hv_params in os_hvp.items():
1416       if hv_params:
1417         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419
1420   # TODO: collapse identical parameter values in a single one
1421   for instance in instances:
1422     if instance.hvparams:
1423       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424                        cluster.FillHV(instance)))
1425
1426   return hvp_data
1427
1428
1429 class _VerifyErrors(object):
1430   """Mix-in for cluster/group verify LUs.
1431
1432   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433   self.op and self._feedback_fn to be available.)
1434
1435   """
1436   TCLUSTER = "cluster"
1437   TNODE = "node"
1438   TINSTANCE = "instance"
1439
1440   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443   ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444   ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452   ENODEDRBD = (TNODE, "ENODEDRBD")
1453   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456   ENODEHV = (TNODE, "ENODEHV")
1457   ENODELVM = (TNODE, "ENODELVM")
1458   ENODEN1 = (TNODE, "ENODEN1")
1459   ENODENET = (TNODE, "ENODENET")
1460   ENODEOS = (TNODE, "ENODEOS")
1461   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463   ENODERPC = (TNODE, "ENODERPC")
1464   ENODESSH = (TNODE, "ENODESSH")
1465   ENODEVERSION = (TNODE, "ENODEVERSION")
1466   ENODESETUP = (TNODE, "ENODESETUP")
1467   ENODETIME = (TNODE, "ENODETIME")
1468   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469
1470   ETYPE_FIELD = "code"
1471   ETYPE_ERROR = "ERROR"
1472   ETYPE_WARNING = "WARNING"
1473
1474   def _Error(self, ecode, item, msg, *args, **kwargs):
1475     """Format an error message.
1476
1477     Based on the opcode's error_codes parameter, either format a
1478     parseable error code, or a simpler error string.
1479
1480     This must be called only from Exec and functions called from Exec.
1481
1482     """
1483     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484     itype, etxt = ecode
1485     # first complete the msg
1486     if args:
1487       msg = msg % args
1488     # then format the whole message
1489     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491     else:
1492       if item:
1493         item = " " + item
1494       else:
1495         item = ""
1496       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497     # and finally report it via the feedback_fn
1498     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499
1500   def _ErrorIf(self, cond, *args, **kwargs):
1501     """Log an error message if the passed condition is True.
1502
1503     """
1504     cond = (bool(cond)
1505             or self.op.debug_simulate_errors) # pylint: disable=E1101
1506     if cond:
1507       self._Error(*args, **kwargs)
1508     # do not mark the operation as failed for WARN cases only
1509     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510       self.bad = self.bad or cond
1511
1512
1513 class LUClusterVerify(NoHooksLU):
1514   """Submits all jobs necessary to verify the cluster.
1515
1516   """
1517   REQ_BGL = False
1518
1519   def ExpandNames(self):
1520     self.needed_locks = {}
1521
1522   def Exec(self, feedback_fn):
1523     jobs = []
1524
1525     if self.op.group_name:
1526       groups = [self.op.group_name]
1527       depends_fn = lambda: None
1528     else:
1529       groups = self.cfg.GetNodeGroupList()
1530
1531       # Verify global configuration
1532       jobs.append([opcodes.OpClusterVerifyConfig()])
1533
1534       # Always depend on global verification
1535       depends_fn = lambda: [(-len(jobs), [])]
1536
1537     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538                                               depends=depends_fn())]
1539                 for group in groups)
1540
1541     # Fix up all parameters
1542     for op in itertools.chain(*jobs): # pylint: disable=W0142
1543       op.debug_simulate_errors = self.op.debug_simulate_errors
1544       op.verbose = self.op.verbose
1545       op.error_codes = self.op.error_codes
1546       try:
1547         op.skip_checks = self.op.skip_checks
1548       except AttributeError:
1549         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550
1551     return ResultWithJobs(jobs)
1552
1553
1554 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555   """Verifies the cluster config.
1556
1557   """
1558   REQ_BGL = True
1559
1560   def _VerifyHVP(self, hvp_data):
1561     """Verifies locally the syntax of the hypervisor parameters.
1562
1563     """
1564     for item, hv_name, hv_params in hvp_data:
1565       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566              (item, hv_name))
1567       try:
1568         hv_class = hypervisor.GetHypervisor(hv_name)
1569         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570         hv_class.CheckParameterSyntax(hv_params)
1571       except errors.GenericError, err:
1572         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573
1574   def ExpandNames(self):
1575     # Information can be safely retrieved as the BGL is acquired in exclusive
1576     # mode
1577     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579     self.all_node_info = self.cfg.GetAllNodesInfo()
1580     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581     self.needed_locks = {}
1582
1583   def Exec(self, feedback_fn):
1584     """Verify integrity of cluster, performing various test on nodes.
1585
1586     """
1587     self.bad = False
1588     self._feedback_fn = feedback_fn
1589
1590     feedback_fn("* Verifying cluster config")
1591
1592     for msg in self.cfg.VerifyConfig():
1593       self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594
1595     feedback_fn("* Verifying cluster certificate files")
1596
1597     for cert_filename in constants.ALL_CERT_FILES:
1598       (errcode, msg) = _VerifyCertificate(cert_filename)
1599       self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600
1601     feedback_fn("* Verifying hypervisor parameters")
1602
1603     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604                                                 self.all_inst_info.values()))
1605
1606     feedback_fn("* Verifying all nodes belong to an existing group")
1607
1608     # We do this verification here because, should this bogus circumstance
1609     # occur, it would never be caught by VerifyGroup, which only acts on
1610     # nodes/instances reachable from existing node groups.
1611
1612     dangling_nodes = set(node.name for node in self.all_node_info.values()
1613                          if node.group not in self.all_group_info)
1614
1615     dangling_instances = {}
1616     no_node_instances = []
1617
1618     for inst in self.all_inst_info.values():
1619       if inst.primary_node in dangling_nodes:
1620         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621       elif inst.primary_node not in self.all_node_info:
1622         no_node_instances.append(inst.name)
1623
1624     pretty_dangling = [
1625         "%s (%s)" %
1626         (node.name,
1627          utils.CommaJoin(dangling_instances.get(node.name,
1628                                                 ["no instances"])))
1629         for node in dangling_nodes]
1630
1631     self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632                   "the following nodes (and their instances) belong to a non"
1633                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1634
1635     self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636                   "the following instances have a non-existing primary-node:"
1637                   " %s", utils.CommaJoin(no_node_instances))
1638
1639     return not self.bad
1640
1641
1642 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643   """Verifies the status of a node group.
1644
1645   """
1646   HPATH = "cluster-verify"
1647   HTYPE = constants.HTYPE_CLUSTER
1648   REQ_BGL = False
1649
1650   _HOOKS_INDENT_RE = re.compile("^", re.M)
1651
1652   class NodeImage(object):
1653     """A class representing the logical and physical status of a node.
1654
1655     @type name: string
1656     @ivar name: the node name to which this object refers
1657     @ivar volumes: a structure as returned from
1658         L{ganeti.backend.GetVolumeList} (runtime)
1659     @ivar instances: a list of running instances (runtime)
1660     @ivar pinst: list of configured primary instances (config)
1661     @ivar sinst: list of configured secondary instances (config)
1662     @ivar sbp: dictionary of {primary-node: list of instances} for all
1663         instances for which this node is secondary (config)
1664     @ivar mfree: free memory, as reported by hypervisor (runtime)
1665     @ivar dfree: free disk, as reported by the node (runtime)
1666     @ivar offline: the offline status (config)
1667     @type rpc_fail: boolean
1668     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669         not whether the individual keys were correct) (runtime)
1670     @type lvm_fail: boolean
1671     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672     @type hyp_fail: boolean
1673     @ivar hyp_fail: whether the RPC call didn't return the instance list
1674     @type ghost: boolean
1675     @ivar ghost: whether this is a known node or not (config)
1676     @type os_fail: boolean
1677     @ivar os_fail: whether the RPC call didn't return valid OS data
1678     @type oslist: list
1679     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680     @type vm_capable: boolean
1681     @ivar vm_capable: whether the node can host instances
1682
1683     """
1684     def __init__(self, offline=False, name=None, vm_capable=True):
1685       self.name = name
1686       self.volumes = {}
1687       self.instances = []
1688       self.pinst = []
1689       self.sinst = []
1690       self.sbp = {}
1691       self.mfree = 0
1692       self.dfree = 0
1693       self.offline = offline
1694       self.vm_capable = vm_capable
1695       self.rpc_fail = False
1696       self.lvm_fail = False
1697       self.hyp_fail = False
1698       self.ghost = False
1699       self.os_fail = False
1700       self.oslist = {}
1701
1702   def ExpandNames(self):
1703     # This raises errors.OpPrereqError on its own:
1704     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705
1706     # Get instances in node group; this is unsafe and needs verification later
1707     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1708
1709     self.needed_locks = {
1710       locking.LEVEL_INSTANCE: inst_names,
1711       locking.LEVEL_NODEGROUP: [self.group_uuid],
1712       locking.LEVEL_NODE: [],
1713       }
1714
1715     self.share_locks = _ShareAll()
1716
1717   def DeclareLocks(self, level):
1718     if level == locking.LEVEL_NODE:
1719       # Get members of node group; this is unsafe and needs verification later
1720       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1721
1722       all_inst_info = self.cfg.GetAllInstancesInfo()
1723
1724       # In Exec(), we warn about mirrored instances that have primary and
1725       # secondary living in separate node groups. To fully verify that
1726       # volumes for these instances are healthy, we will need to do an
1727       # extra call to their secondaries. We ensure here those nodes will
1728       # be locked.
1729       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730         # Important: access only the instances whose lock is owned
1731         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732           nodes.update(all_inst_info[inst].secondary_nodes)
1733
1734       self.needed_locks[locking.LEVEL_NODE] = nodes
1735
1736   def CheckPrereq(self):
1737     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1739
1740     group_nodes = set(self.group_info.members)
1741     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1742
1743     unlocked_nodes = \
1744         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1745
1746     unlocked_instances = \
1747         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1748
1749     if unlocked_nodes:
1750       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751                                  utils.CommaJoin(unlocked_nodes))
1752
1753     if unlocked_instances:
1754       raise errors.OpPrereqError("Missing lock for instances: %s" %
1755                                  utils.CommaJoin(unlocked_instances))
1756
1757     self.all_node_info = self.cfg.GetAllNodesInfo()
1758     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1759
1760     self.my_node_names = utils.NiceSort(group_nodes)
1761     self.my_inst_names = utils.NiceSort(group_instances)
1762
1763     self.my_node_info = dict((name, self.all_node_info[name])
1764                              for name in self.my_node_names)
1765
1766     self.my_inst_info = dict((name, self.all_inst_info[name])
1767                              for name in self.my_inst_names)
1768
1769     # We detect here the nodes that will need the extra RPC calls for verifying
1770     # split LV volumes; they should be locked.
1771     extra_lv_nodes = set()
1772
1773     for inst in self.my_inst_info.values():
1774       if inst.disk_template in constants.DTS_INT_MIRROR:
1775         group = self.my_node_info[inst.primary_node].group
1776         for nname in inst.secondary_nodes:
1777           if self.all_node_info[nname].group != group:
1778             extra_lv_nodes.add(nname)
1779
1780     unlocked_lv_nodes = \
1781         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1782
1783     if unlocked_lv_nodes:
1784       raise errors.OpPrereqError("these nodes could be locked: %s" %
1785                                  utils.CommaJoin(unlocked_lv_nodes))
1786     self.extra_lv_nodes = list(extra_lv_nodes)
1787
1788   def _VerifyNode(self, ninfo, nresult):
1789     """Perform some basic validation on data returned from a node.
1790
1791       - check the result data structure is well formed and has all the
1792         mandatory fields
1793       - check ganeti version
1794
1795     @type ninfo: L{objects.Node}
1796     @param ninfo: the node to check
1797     @param nresult: the results from the node
1798     @rtype: boolean
1799     @return: whether overall this call was successful (and we can expect
1800          reasonable values in the respose)
1801
1802     """
1803     node = ninfo.name
1804     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1805
1806     # main result, nresult should be a non-empty dict
1807     test = not nresult or not isinstance(nresult, dict)
1808     _ErrorIf(test, self.ENODERPC, node,
1809                   "unable to verify node: no data returned")
1810     if test:
1811       return False
1812
1813     # compares ganeti version
1814     local_version = constants.PROTOCOL_VERSION
1815     remote_version = nresult.get("version", None)
1816     test = not (remote_version and
1817                 isinstance(remote_version, (list, tuple)) and
1818                 len(remote_version) == 2)
1819     _ErrorIf(test, self.ENODERPC, node,
1820              "connection to node returned invalid data")
1821     if test:
1822       return False
1823
1824     test = local_version != remote_version[0]
1825     _ErrorIf(test, self.ENODEVERSION, node,
1826              "incompatible protocol versions: master %s,"
1827              " node %s", local_version, remote_version[0])
1828     if test:
1829       return False
1830
1831     # node seems compatible, we can actually try to look into its results
1832
1833     # full package version
1834     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835                   self.ENODEVERSION, node,
1836                   "software version mismatch: master %s, node %s",
1837                   constants.RELEASE_VERSION, remote_version[1],
1838                   code=self.ETYPE_WARNING)
1839
1840     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841     if ninfo.vm_capable and isinstance(hyp_result, dict):
1842       for hv_name, hv_result in hyp_result.iteritems():
1843         test = hv_result is not None
1844         _ErrorIf(test, self.ENODEHV, node,
1845                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1846
1847     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848     if ninfo.vm_capable and isinstance(hvp_result, list):
1849       for item, hv_name, hv_result in hvp_result:
1850         _ErrorIf(True, self.ENODEHV, node,
1851                  "hypervisor %s parameter verify failure (source %s): %s",
1852                  hv_name, item, hv_result)
1853
1854     test = nresult.get(constants.NV_NODESETUP,
1855                        ["Missing NODESETUP results"])
1856     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1857              "; ".join(test))
1858
1859     return True
1860
1861   def _VerifyNodeTime(self, ninfo, nresult,
1862                       nvinfo_starttime, nvinfo_endtime):
1863     """Check the node time.
1864
1865     @type ninfo: L{objects.Node}
1866     @param ninfo: the node to check
1867     @param nresult: the remote results for the node
1868     @param nvinfo_starttime: the start time of the RPC call
1869     @param nvinfo_endtime: the end time of the RPC call
1870
1871     """
1872     node = ninfo.name
1873     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1874
1875     ntime = nresult.get(constants.NV_TIME, None)
1876     try:
1877       ntime_merged = utils.MergeTime(ntime)
1878     except (ValueError, TypeError):
1879       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1880       return
1881
1882     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1886     else:
1887       ntime_diff = None
1888
1889     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890              "Node time diverges by at least %s from master node time",
1891              ntime_diff)
1892
1893   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894     """Check the node LVM results.
1895
1896     @type ninfo: L{objects.Node}
1897     @param ninfo: the node to check
1898     @param nresult: the remote results for the node
1899     @param vg_name: the configured VG name
1900
1901     """
1902     if vg_name is None:
1903       return
1904
1905     node = ninfo.name
1906     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1907
1908     # checks vg existence and size > 20G
1909     vglist = nresult.get(constants.NV_VGLIST, None)
1910     test = not vglist
1911     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1912     if not test:
1913       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914                                             constants.MIN_VG_SIZE)
1915       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1916
1917     # check pv names
1918     pvlist = nresult.get(constants.NV_PVLIST, None)
1919     test = pvlist is None
1920     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1921     if not test:
1922       # check that ':' is not present in PV names, since it's a
1923       # special character for lvcreate (denotes the range of PEs to
1924       # use on the PV)
1925       for _, pvname, owner_vg in pvlist:
1926         test = ":" in pvname
1927         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928                  " '%s' of VG '%s'", pvname, owner_vg)
1929
1930   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931     """Check the node bridges.
1932
1933     @type ninfo: L{objects.Node}
1934     @param ninfo: the node to check
1935     @param nresult: the remote results for the node
1936     @param bridges: the expected list of bridges
1937
1938     """
1939     if not bridges:
1940       return
1941
1942     node = ninfo.name
1943     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1944
1945     missing = nresult.get(constants.NV_BRIDGES, None)
1946     test = not isinstance(missing, list)
1947     _ErrorIf(test, self.ENODENET, node,
1948              "did not return valid bridge information")
1949     if not test:
1950       _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951                utils.CommaJoin(sorted(missing)))
1952
1953   def _VerifyNodeNetwork(self, ninfo, nresult):
1954     """Check the node network connectivity results.
1955
1956     @type ninfo: L{objects.Node}
1957     @param ninfo: the node to check
1958     @param nresult: the remote results for the node
1959
1960     """
1961     node = ninfo.name
1962     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1963
1964     test = constants.NV_NODELIST not in nresult
1965     _ErrorIf(test, self.ENODESSH, node,
1966              "node hasn't returned node ssh connectivity data")
1967     if not test:
1968       if nresult[constants.NV_NODELIST]:
1969         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970           _ErrorIf(True, self.ENODESSH, node,
1971                    "ssh communication with node '%s': %s", a_node, a_msg)
1972
1973     test = constants.NV_NODENETTEST not in nresult
1974     _ErrorIf(test, self.ENODENET, node,
1975              "node hasn't returned node tcp connectivity data")
1976     if not test:
1977       if nresult[constants.NV_NODENETTEST]:
1978         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1979         for anode in nlist:
1980           _ErrorIf(True, self.ENODENET, node,
1981                    "tcp communication with node '%s': %s",
1982                    anode, nresult[constants.NV_NODENETTEST][anode])
1983
1984     test = constants.NV_MASTERIP not in nresult
1985     _ErrorIf(test, self.ENODENET, node,
1986              "node hasn't returned node master IP reachability data")
1987     if not test:
1988       if not nresult[constants.NV_MASTERIP]:
1989         if node == self.master_node:
1990           msg = "the master node cannot reach the master IP (not configured?)"
1991         else:
1992           msg = "cannot reach the master IP"
1993         _ErrorIf(True, self.ENODENET, node, msg)
1994
1995   def _VerifyInstance(self, instance, instanceconfig, node_image,
1996                       diskstatus):
1997     """Verify an instance.
1998
1999     This function checks to see if the required block devices are
2000     available on the instance's node.
2001
2002     """
2003     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004     node_current = instanceconfig.primary_node
2005
2006     node_vol_should = {}
2007     instanceconfig.MapLVsByNode(node_vol_should)
2008
2009     for node in node_vol_should:
2010       n_img = node_image[node]
2011       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012         # ignore missing volumes on offline or broken nodes
2013         continue
2014       for volume in node_vol_should[node]:
2015         test = volume not in n_img.volumes
2016         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017                  "volume %s missing on node %s", volume, node)
2018
2019     if instanceconfig.admin_up:
2020       pri_img = node_image[node_current]
2021       test = instance not in pri_img.instances and not pri_img.offline
2022       _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023                "instance not running on its primary node %s",
2024                node_current)
2025
2026     diskdata = [(nname, success, status, idx)
2027                 for (nname, disks) in diskstatus.items()
2028                 for idx, (success, status) in enumerate(disks)]
2029
2030     for nname, success, bdev_status, idx in diskdata:
2031       # the 'ghost node' construction in Exec() ensures that we have a
2032       # node here
2033       snode = node_image[nname]
2034       bad_snode = snode.ghost or snode.offline
2035       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036                self.EINSTANCEFAULTYDISK, instance,
2037                "couldn't retrieve status for disk/%s on %s: %s",
2038                idx, nname, bdev_status)
2039       _ErrorIf((instanceconfig.admin_up and success and
2040                 bdev_status.ldisk_status == constants.LDS_FAULTY),
2041                self.EINSTANCEFAULTYDISK, instance,
2042                "disk/%s on %s is faulty", idx, nname)
2043
2044   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045     """Verify if there are any unknown volumes in the cluster.
2046
2047     The .os, .swap and backup volumes are ignored. All other volumes are
2048     reported as unknown.
2049
2050     @type reserved: L{ganeti.utils.FieldSet}
2051     @param reserved: a FieldSet of reserved volume names
2052
2053     """
2054     for node, n_img in node_image.items():
2055       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056         # skip non-healthy nodes
2057         continue
2058       for volume in n_img.volumes:
2059         test = ((node not in node_vol_should or
2060                 volume not in node_vol_should[node]) and
2061                 not reserved.Matches(volume))
2062         self._ErrorIf(test, self.ENODEORPHANLV, node,
2063                       "volume %s is unknown", volume)
2064
2065   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066     """Verify N+1 Memory Resilience.
2067
2068     Check that if one single node dies we can still start all the
2069     instances it was primary for.
2070
2071     """
2072     cluster_info = self.cfg.GetClusterInfo()
2073     for node, n_img in node_image.items():
2074       # This code checks that every node which is now listed as
2075       # secondary has enough memory to host all instances it is
2076       # supposed to should a single other node in the cluster fail.
2077       # FIXME: not ready for failover to an arbitrary node
2078       # FIXME: does not support file-backed instances
2079       # WARNING: we currently take into account down instances as well
2080       # as up ones, considering that even if they're down someone
2081       # might want to start them even in the event of a node failure.
2082       if n_img.offline:
2083         # we're skipping offline nodes from the N+1 warning, since
2084         # most likely we don't have good memory infromation from them;
2085         # we already list instances living on such nodes, and that's
2086         # enough warning
2087         continue
2088       for prinode, instances in n_img.sbp.items():
2089         needed_mem = 0
2090         for instance in instances:
2091           bep = cluster_info.FillBE(instance_cfg[instance])
2092           if bep[constants.BE_AUTO_BALANCE]:
2093             needed_mem += bep[constants.BE_MEMORY]
2094         test = n_img.mfree < needed_mem
2095         self._ErrorIf(test, self.ENODEN1, node,
2096                       "not enough memory to accomodate instance failovers"
2097                       " should node %s fail (%dMiB needed, %dMiB available)",
2098                       prinode, needed_mem, n_img.mfree)
2099
2100   @classmethod
2101   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102                    (files_all, files_all_opt, files_mc, files_vm)):
2103     """Verifies file checksums collected from all nodes.
2104
2105     @param errorif: Callback for reporting errors
2106     @param nodeinfo: List of L{objects.Node} objects
2107     @param master_node: Name of master node
2108     @param all_nvinfo: RPC results
2109
2110     """
2111     node_names = frozenset(node.name for node in nodeinfo if not node.offline)
2112
2113     assert master_node in node_names
2114     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2115             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2116            "Found file listed in more than one file list"
2117
2118     # Define functions determining which nodes to consider for a file
2119     file2nodefn = dict([(filename, fn)
2120       for (files, fn) in [(files_all, None),
2121                           (files_all_opt, None),
2122                           (files_mc, lambda node: (node.master_candidate or
2123                                                    node.name == master_node)),
2124                           (files_vm, lambda node: node.vm_capable)]
2125       for filename in files])
2126
2127     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2128
2129     for node in nodeinfo:
2130       if node.offline:
2131         continue
2132
2133       nresult = all_nvinfo[node.name]
2134
2135       if nresult.fail_msg or not nresult.payload:
2136         node_files = None
2137       else:
2138         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2139
2140       test = not (node_files and isinstance(node_files, dict))
2141       errorif(test, cls.ENODEFILECHECK, node.name,
2142               "Node did not return file checksum data")
2143       if test:
2144         continue
2145
2146       for (filename, checksum) in node_files.items():
2147         # Check if the file should be considered for a node
2148         fn = file2nodefn[filename]
2149         if fn is None or fn(node):
2150           fileinfo[filename].setdefault(checksum, set()).add(node.name)
2151
2152     for (filename, checksums) in fileinfo.items():
2153       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2154
2155       # Nodes having the file
2156       with_file = frozenset(node_name
2157                             for nodes in fileinfo[filename].values()
2158                             for node_name in nodes)
2159
2160       # Nodes missing file
2161       missing_file = node_names - with_file
2162
2163       if filename in files_all_opt:
2164         # All or no nodes
2165         errorif(missing_file and missing_file != node_names,
2166                 cls.ECLUSTERFILECHECK, None,
2167                 "File %s is optional, but it must exist on all or no"
2168                 " nodes (not found on %s)",
2169                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2170       else:
2171         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2172                 "File %s is missing from node(s) %s", filename,
2173                 utils.CommaJoin(utils.NiceSort(missing_file)))
2174
2175       # See if there are multiple versions of the file
2176       test = len(checksums) > 1
2177       if test:
2178         variants = ["variant %s on %s" %
2179                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2180                     for (idx, (checksum, nodes)) in
2181                       enumerate(sorted(checksums.items()))]
2182       else:
2183         variants = []
2184
2185       errorif(test, cls.ECLUSTERFILECHECK, None,
2186               "File %s found with %s different checksums (%s)",
2187               filename, len(checksums), "; ".join(variants))
2188
2189   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2190                       drbd_map):
2191     """Verifies and the node DRBD status.
2192
2193     @type ninfo: L{objects.Node}
2194     @param ninfo: the node to check
2195     @param nresult: the remote results for the node
2196     @param instanceinfo: the dict of instances
2197     @param drbd_helper: the configured DRBD usermode helper
2198     @param drbd_map: the DRBD map as returned by
2199         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2200
2201     """
2202     node = ninfo.name
2203     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2204
2205     if drbd_helper:
2206       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2207       test = (helper_result == None)
2208       _ErrorIf(test, self.ENODEDRBDHELPER, node,
2209                "no drbd usermode helper returned")
2210       if helper_result:
2211         status, payload = helper_result
2212         test = not status
2213         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2214                  "drbd usermode helper check unsuccessful: %s", payload)
2215         test = status and (payload != drbd_helper)
2216         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2217                  "wrong drbd usermode helper: %s", payload)
2218
2219     # compute the DRBD minors
2220     node_drbd = {}
2221     for minor, instance in drbd_map[node].items():
2222       test = instance not in instanceinfo
2223       _ErrorIf(test, self.ECLUSTERCFG, None,
2224                "ghost instance '%s' in temporary DRBD map", instance)
2225         # ghost instance should not be running, but otherwise we
2226         # don't give double warnings (both ghost instance and
2227         # unallocated minor in use)
2228       if test:
2229         node_drbd[minor] = (instance, False)
2230       else:
2231         instance = instanceinfo[instance]
2232         node_drbd[minor] = (instance.name, instance.admin_up)
2233
2234     # and now check them
2235     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2236     test = not isinstance(used_minors, (tuple, list))
2237     _ErrorIf(test, self.ENODEDRBD, node,
2238              "cannot parse drbd status file: %s", str(used_minors))
2239     if test:
2240       # we cannot check drbd status
2241       return
2242
2243     for minor, (iname, must_exist) in node_drbd.items():
2244       test = minor not in used_minors and must_exist
2245       _ErrorIf(test, self.ENODEDRBD, node,
2246                "drbd minor %d of instance %s is not active", minor, iname)
2247     for minor in used_minors:
2248       test = minor not in node_drbd
2249       _ErrorIf(test, self.ENODEDRBD, node,
2250                "unallocated drbd minor %d is in use", minor)
2251
2252   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2253     """Builds the node OS structures.
2254
2255     @type ninfo: L{objects.Node}
2256     @param ninfo: the node to check
2257     @param nresult: the remote results for the node
2258     @param nimg: the node image object
2259
2260     """
2261     node = ninfo.name
2262     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2263
2264     remote_os = nresult.get(constants.NV_OSLIST, None)
2265     test = (not isinstance(remote_os, list) or
2266             not compat.all(isinstance(v, list) and len(v) == 7
2267                            for v in remote_os))
2268
2269     _ErrorIf(test, self.ENODEOS, node,
2270              "node hasn't returned valid OS data")
2271
2272     nimg.os_fail = test
2273
2274     if test:
2275       return
2276
2277     os_dict = {}
2278
2279     for (name, os_path, status, diagnose,
2280          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2281
2282       if name not in os_dict:
2283         os_dict[name] = []
2284
2285       # parameters is a list of lists instead of list of tuples due to
2286       # JSON lacking a real tuple type, fix it:
2287       parameters = [tuple(v) for v in parameters]
2288       os_dict[name].append((os_path, status, diagnose,
2289                             set(variants), set(parameters), set(api_ver)))
2290
2291     nimg.oslist = os_dict
2292
2293   def _VerifyNodeOS(self, ninfo, nimg, base):
2294     """Verifies the node OS list.
2295
2296     @type ninfo: L{objects.Node}
2297     @param ninfo: the node to check
2298     @param nimg: the node image object
2299     @param base: the 'template' node we match against (e.g. from the master)
2300
2301     """
2302     node = ninfo.name
2303     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2304
2305     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2306
2307     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2308     for os_name, os_data in nimg.oslist.items():
2309       assert os_data, "Empty OS status for OS %s?!" % os_name
2310       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2311       _ErrorIf(not f_status, self.ENODEOS, node,
2312                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2313       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2314                "OS '%s' has multiple entries (first one shadows the rest): %s",
2315                os_name, utils.CommaJoin([v[0] for v in os_data]))
2316       # comparisons with the 'base' image
2317       test = os_name not in base.oslist
2318       _ErrorIf(test, self.ENODEOS, node,
2319                "Extra OS %s not present on reference node (%s)",
2320                os_name, base.name)
2321       if test:
2322         continue
2323       assert base.oslist[os_name], "Base node has empty OS status?"
2324       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2325       if not b_status:
2326         # base OS is invalid, skipping
2327         continue
2328       for kind, a, b in [("API version", f_api, b_api),
2329                          ("variants list", f_var, b_var),
2330                          ("parameters", beautify_params(f_param),
2331                           beautify_params(b_param))]:
2332         _ErrorIf(a != b, self.ENODEOS, node,
2333                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2334                  kind, os_name, base.name,
2335                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2336
2337     # check any missing OSes
2338     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2339     _ErrorIf(missing, self.ENODEOS, node,
2340              "OSes present on reference node %s but missing on this node: %s",
2341              base.name, utils.CommaJoin(missing))
2342
2343   def _VerifyOob(self, ninfo, nresult):
2344     """Verifies out of band functionality of a node.
2345
2346     @type ninfo: L{objects.Node}
2347     @param ninfo: the node to check
2348     @param nresult: the remote results for the node
2349
2350     """
2351     node = ninfo.name
2352     # We just have to verify the paths on master and/or master candidates
2353     # as the oob helper is invoked on the master
2354     if ((ninfo.master_candidate or ninfo.master_capable) and
2355         constants.NV_OOB_PATHS in nresult):
2356       for path_result in nresult[constants.NV_OOB_PATHS]:
2357         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2358
2359   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2360     """Verifies and updates the node volume data.
2361
2362     This function will update a L{NodeImage}'s internal structures
2363     with data from the remote call.
2364
2365     @type ninfo: L{objects.Node}
2366     @param ninfo: the node to check
2367     @param nresult: the remote results for the node
2368     @param nimg: the node image object
2369     @param vg_name: the configured VG name
2370
2371     """
2372     node = ninfo.name
2373     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2374
2375     nimg.lvm_fail = True
2376     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2377     if vg_name is None:
2378       pass
2379     elif isinstance(lvdata, basestring):
2380       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2381                utils.SafeEncode(lvdata))
2382     elif not isinstance(lvdata, dict):
2383       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2384     else:
2385       nimg.volumes = lvdata
2386       nimg.lvm_fail = False
2387
2388   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2389     """Verifies and updates the node instance list.
2390
2391     If the listing was successful, then updates this node's instance
2392     list. Otherwise, it marks the RPC call as failed for the instance
2393     list key.
2394
2395     @type ninfo: L{objects.Node}
2396     @param ninfo: the node to check
2397     @param nresult: the remote results for the node
2398     @param nimg: the node image object
2399
2400     """
2401     idata = nresult.get(constants.NV_INSTANCELIST, None)
2402     test = not isinstance(idata, list)
2403     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2404                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2405     if test:
2406       nimg.hyp_fail = True
2407     else:
2408       nimg.instances = idata
2409
2410   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2411     """Verifies and computes a node information map
2412
2413     @type ninfo: L{objects.Node}
2414     @param ninfo: the node to check
2415     @param nresult: the remote results for the node
2416     @param nimg: the node image object
2417     @param vg_name: the configured VG name
2418
2419     """
2420     node = ninfo.name
2421     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2422
2423     # try to read free memory (from the hypervisor)
2424     hv_info = nresult.get(constants.NV_HVINFO, None)
2425     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2426     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2427     if not test:
2428       try:
2429         nimg.mfree = int(hv_info["memory_free"])
2430       except (ValueError, TypeError):
2431         _ErrorIf(True, self.ENODERPC, node,
2432                  "node returned invalid nodeinfo, check hypervisor")
2433
2434     # FIXME: devise a free space model for file based instances as well
2435     if vg_name is not None:
2436       test = (constants.NV_VGLIST not in nresult or
2437               vg_name not in nresult[constants.NV_VGLIST])
2438       _ErrorIf(test, self.ENODELVM, node,
2439                "node didn't return data for the volume group '%s'"
2440                " - it is either missing or broken", vg_name)
2441       if not test:
2442         try:
2443           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2444         except (ValueError, TypeError):
2445           _ErrorIf(True, self.ENODERPC, node,
2446                    "node returned invalid LVM info, check LVM status")
2447
2448   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2449     """Gets per-disk status information for all instances.
2450
2451     @type nodelist: list of strings
2452     @param nodelist: Node names
2453     @type node_image: dict of (name, L{objects.Node})
2454     @param node_image: Node objects
2455     @type instanceinfo: dict of (name, L{objects.Instance})
2456     @param instanceinfo: Instance objects
2457     @rtype: {instance: {node: [(succes, payload)]}}
2458     @return: a dictionary of per-instance dictionaries with nodes as
2459         keys and disk information as values; the disk information is a
2460         list of tuples (success, payload)
2461
2462     """
2463     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2464
2465     node_disks = {}
2466     node_disks_devonly = {}
2467     diskless_instances = set()
2468     diskless = constants.DT_DISKLESS
2469
2470     for nname in nodelist:
2471       node_instances = list(itertools.chain(node_image[nname].pinst,
2472                                             node_image[nname].sinst))
2473       diskless_instances.update(inst for inst in node_instances
2474                                 if instanceinfo[inst].disk_template == diskless)
2475       disks = [(inst, disk)
2476                for inst in node_instances
2477                for disk in instanceinfo[inst].disks]
2478
2479       if not disks:
2480         # No need to collect data
2481         continue
2482
2483       node_disks[nname] = disks
2484
2485       # Creating copies as SetDiskID below will modify the objects and that can
2486       # lead to incorrect data returned from nodes
2487       devonly = [dev.Copy() for (_, dev) in disks]
2488
2489       for dev in devonly:
2490         self.cfg.SetDiskID(dev, nname)
2491
2492       node_disks_devonly[nname] = devonly
2493
2494     assert len(node_disks) == len(node_disks_devonly)
2495
2496     # Collect data from all nodes with disks
2497     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2498                                                           node_disks_devonly)
2499
2500     assert len(result) == len(node_disks)
2501
2502     instdisk = {}
2503
2504     for (nname, nres) in result.items():
2505       disks = node_disks[nname]
2506
2507       if nres.offline:
2508         # No data from this node
2509         data = len(disks) * [(False, "node offline")]
2510       else:
2511         msg = nres.fail_msg
2512         _ErrorIf(msg, self.ENODERPC, nname,
2513                  "while getting disk information: %s", msg)
2514         if msg:
2515           # No data from this node
2516           data = len(disks) * [(False, msg)]
2517         else:
2518           data = []
2519           for idx, i in enumerate(nres.payload):
2520             if isinstance(i, (tuple, list)) and len(i) == 2:
2521               data.append(i)
2522             else:
2523               logging.warning("Invalid result from node %s, entry %d: %s",
2524                               nname, idx, i)
2525               data.append((False, "Invalid result from the remote node"))
2526
2527       for ((inst, _), status) in zip(disks, data):
2528         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2529
2530     # Add empty entries for diskless instances.
2531     for inst in diskless_instances:
2532       assert inst not in instdisk
2533       instdisk[inst] = {}
2534
2535     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2536                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2537                       compat.all(isinstance(s, (tuple, list)) and
2538                                  len(s) == 2 for s in statuses)
2539                       for inst, nnames in instdisk.items()
2540                       for nname, statuses in nnames.items())
2541     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2542
2543     return instdisk
2544
2545   @staticmethod
2546   def _SshNodeSelector(group_uuid, all_nodes):
2547     """Create endless iterators for all potential SSH check hosts.
2548
2549     """
2550     nodes = [node for node in all_nodes
2551              if (node.group != group_uuid and
2552                  not node.offline)]
2553     keyfunc = operator.attrgetter("group")
2554
2555     return map(itertools.cycle,
2556                [sorted(map(operator.attrgetter("name"), names))
2557                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2558                                                   keyfunc)])
2559
2560   @classmethod
2561   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2562     """Choose which nodes should talk to which other nodes.
2563
2564     We will make nodes contact all nodes in their group, and one node from
2565     every other group.
2566
2567     @warning: This algorithm has a known issue if one node group is much
2568       smaller than others (e.g. just one node). In such a case all other
2569       nodes will talk to the single node.
2570
2571     """
2572     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2573     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2574
2575     return (online_nodes,
2576             dict((name, sorted([i.next() for i in sel]))
2577                  for name in online_nodes))
2578
2579   def BuildHooksEnv(self):
2580     """Build hooks env.
2581
2582     Cluster-Verify hooks just ran in the post phase and their failure makes
2583     the output be logged in the verify output and the verification to fail.
2584
2585     """
2586     env = {
2587       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2588       }
2589
2590     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2591                for node in self.my_node_info.values())
2592
2593     return env
2594
2595   def BuildHooksNodes(self):
2596     """Build hooks nodes.
2597
2598     """
2599     return ([], self.my_node_names)
2600
2601   def Exec(self, feedback_fn):
2602     """Verify integrity of the node group, performing various test on nodes.
2603
2604     """
2605     # This method has too many local variables. pylint: disable=R0914
2606     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2607
2608     if not self.my_node_names:
2609       # empty node group
2610       feedback_fn("* Empty node group, skipping verification")
2611       return True
2612
2613     self.bad = False
2614     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2615     verbose = self.op.verbose
2616     self._feedback_fn = feedback_fn
2617
2618     vg_name = self.cfg.GetVGName()
2619     drbd_helper = self.cfg.GetDRBDHelper()
2620     cluster = self.cfg.GetClusterInfo()
2621     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2622     hypervisors = cluster.enabled_hypervisors
2623     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2624
2625     i_non_redundant = [] # Non redundant instances
2626     i_non_a_balanced = [] # Non auto-balanced instances
2627     n_offline = 0 # Count of offline nodes
2628     n_drained = 0 # Count of nodes being drained
2629     node_vol_should = {}
2630
2631     # FIXME: verify OS list
2632
2633     # File verification
2634     filemap = _ComputeAncillaryFiles(cluster, False)
2635
2636     # do local checksums
2637     master_node = self.master_node = self.cfg.GetMasterNode()
2638     master_ip = self.cfg.GetMasterIP()
2639
2640     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2641
2642     node_verify_param = {
2643       constants.NV_FILELIST:
2644         utils.UniqueSequence(filename
2645                              for files in filemap
2646                              for filename in files),
2647       constants.NV_NODELIST:
2648         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2649                                   self.all_node_info.values()),
2650       constants.NV_HYPERVISOR: hypervisors,
2651       constants.NV_HVPARAMS:
2652         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2653       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2654                                  for node in node_data_list
2655                                  if not node.offline],
2656       constants.NV_INSTANCELIST: hypervisors,
2657       constants.NV_VERSION: None,
2658       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2659       constants.NV_NODESETUP: None,
2660       constants.NV_TIME: None,
2661       constants.NV_MASTERIP: (master_node, master_ip),
2662       constants.NV_OSLIST: None,
2663       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2664       }
2665
2666     if vg_name is not None:
2667       node_verify_param[constants.NV_VGLIST] = None
2668       node_verify_param[constants.NV_LVLIST] = vg_name
2669       node_verify_param[constants.NV_PVLIST] = [vg_name]
2670       node_verify_param[constants.NV_DRBDLIST] = None
2671
2672     if drbd_helper:
2673       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2674
2675     # bridge checks
2676     # FIXME: this needs to be changed per node-group, not cluster-wide
2677     bridges = set()
2678     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2679     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2680       bridges.add(default_nicpp[constants.NIC_LINK])
2681     for instance in self.my_inst_info.values():
2682       for nic in instance.nics:
2683         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2684         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2685           bridges.add(full_nic[constants.NIC_LINK])
2686
2687     if bridges:
2688       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2689
2690     # Build our expected cluster state
2691     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2692                                                  name=node.name,
2693                                                  vm_capable=node.vm_capable))
2694                       for node in node_data_list)
2695
2696     # Gather OOB paths
2697     oob_paths = []
2698     for node in self.all_node_info.values():
2699       path = _SupportsOob(self.cfg, node)
2700       if path and path not in oob_paths:
2701         oob_paths.append(path)
2702
2703     if oob_paths:
2704       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2705
2706     for instance in self.my_inst_names:
2707       inst_config = self.my_inst_info[instance]
2708
2709       for nname in inst_config.all_nodes:
2710         if nname not in node_image:
2711           gnode = self.NodeImage(name=nname)
2712           gnode.ghost = (nname not in self.all_node_info)
2713           node_image[nname] = gnode
2714
2715       inst_config.MapLVsByNode(node_vol_should)
2716
2717       pnode = inst_config.primary_node
2718       node_image[pnode].pinst.append(instance)
2719
2720       for snode in inst_config.secondary_nodes:
2721         nimg = node_image[snode]
2722         nimg.sinst.append(instance)
2723         if pnode not in nimg.sbp:
2724           nimg.sbp[pnode] = []
2725         nimg.sbp[pnode].append(instance)
2726
2727     # At this point, we have the in-memory data structures complete,
2728     # except for the runtime information, which we'll gather next
2729
2730     # Due to the way our RPC system works, exact response times cannot be
2731     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2732     # time before and after executing the request, we can at least have a time
2733     # window.
2734     nvinfo_starttime = time.time()
2735     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2736                                            node_verify_param,
2737                                            self.cfg.GetClusterName())
2738     nvinfo_endtime = time.time()
2739
2740     if self.extra_lv_nodes and vg_name is not None:
2741       extra_lv_nvinfo = \
2742           self.rpc.call_node_verify(self.extra_lv_nodes,
2743                                     {constants.NV_LVLIST: vg_name},
2744                                     self.cfg.GetClusterName())
2745     else:
2746       extra_lv_nvinfo = {}
2747
2748     all_drbd_map = self.cfg.ComputeDRBDMap()
2749
2750     feedback_fn("* Gathering disk information (%s nodes)" %
2751                 len(self.my_node_names))
2752     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2753                                      self.my_inst_info)
2754
2755     feedback_fn("* Verifying configuration file consistency")
2756
2757     # If not all nodes are being checked, we need to make sure the master node
2758     # and a non-checked vm_capable node are in the list.
2759     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2760     if absent_nodes:
2761       vf_nvinfo = all_nvinfo.copy()
2762       vf_node_info = list(self.my_node_info.values())
2763       additional_nodes = []
2764       if master_node not in self.my_node_info:
2765         additional_nodes.append(master_node)
2766         vf_node_info.append(self.all_node_info[master_node])
2767       # Add the first vm_capable node we find which is not included
2768       for node in absent_nodes:
2769         nodeinfo = self.all_node_info[node]
2770         if nodeinfo.vm_capable and not nodeinfo.offline:
2771           additional_nodes.append(node)
2772           vf_node_info.append(self.all_node_info[node])
2773           break
2774       key = constants.NV_FILELIST
2775       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2776                                                  {key: node_verify_param[key]},
2777                                                  self.cfg.GetClusterName()))
2778     else:
2779       vf_nvinfo = all_nvinfo
2780       vf_node_info = self.my_node_info.values()
2781
2782     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2783
2784     feedback_fn("* Verifying node status")
2785
2786     refos_img = None
2787
2788     for node_i in node_data_list:
2789       node = node_i.name
2790       nimg = node_image[node]
2791
2792       if node_i.offline:
2793         if verbose:
2794           feedback_fn("* Skipping offline node %s" % (node,))
2795         n_offline += 1
2796         continue
2797
2798       if node == master_node:
2799         ntype = "master"
2800       elif node_i.master_candidate:
2801         ntype = "master candidate"
2802       elif node_i.drained:
2803         ntype = "drained"
2804         n_drained += 1
2805       else:
2806         ntype = "regular"
2807       if verbose:
2808         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2809
2810       msg = all_nvinfo[node].fail_msg
2811       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2812       if msg:
2813         nimg.rpc_fail = True
2814         continue
2815
2816       nresult = all_nvinfo[node].payload
2817
2818       nimg.call_ok = self._VerifyNode(node_i, nresult)
2819       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2820       self._VerifyNodeNetwork(node_i, nresult)
2821       self._VerifyOob(node_i, nresult)
2822
2823       if nimg.vm_capable:
2824         self._VerifyNodeLVM(node_i, nresult, vg_name)
2825         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2826                              all_drbd_map)
2827
2828         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2829         self._UpdateNodeInstances(node_i, nresult, nimg)
2830         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2831         self._UpdateNodeOS(node_i, nresult, nimg)
2832
2833         if not nimg.os_fail:
2834           if refos_img is None:
2835             refos_img = nimg
2836           self._VerifyNodeOS(node_i, nimg, refos_img)
2837         self._VerifyNodeBridges(node_i, nresult, bridges)
2838
2839         # Check whether all running instancies are primary for the node. (This
2840         # can no longer be done from _VerifyInstance below, since some of the
2841         # wrong instances could be from other node groups.)
2842         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2843
2844         for inst in non_primary_inst:
2845           test = inst in self.all_inst_info
2846           _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2847                    "instance should not run on node %s", node_i.name)
2848           _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2849                    "node is running unknown instance %s", inst)
2850
2851     for node, result in extra_lv_nvinfo.items():
2852       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2853                               node_image[node], vg_name)
2854
2855     feedback_fn("* Verifying instance status")
2856     for instance in self.my_inst_names:
2857       if verbose:
2858         feedback_fn("* Verifying instance %s" % instance)
2859       inst_config = self.my_inst_info[instance]
2860       self._VerifyInstance(instance, inst_config, node_image,
2861                            instdisk[instance])
2862       inst_nodes_offline = []
2863
2864       pnode = inst_config.primary_node
2865       pnode_img = node_image[pnode]
2866       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2867                self.ENODERPC, pnode, "instance %s, connection to"
2868                " primary node failed", instance)
2869
2870       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2871                self.EINSTANCEBADNODE, instance,
2872                "instance is marked as running and lives on offline node %s",
2873                inst_config.primary_node)
2874
2875       # If the instance is non-redundant we cannot survive losing its primary
2876       # node, so we are not N+1 compliant. On the other hand we have no disk
2877       # templates with more than one secondary so that situation is not well
2878       # supported either.
2879       # FIXME: does not support file-backed instances
2880       if not inst_config.secondary_nodes:
2881         i_non_redundant.append(instance)
2882
2883       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2884                instance, "instance has multiple secondary nodes: %s",
2885                utils.CommaJoin(inst_config.secondary_nodes),
2886                code=self.ETYPE_WARNING)
2887
2888       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2889         pnode = inst_config.primary_node
2890         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2891         instance_groups = {}
2892
2893         for node in instance_nodes:
2894           instance_groups.setdefault(self.all_node_info[node].group,
2895                                      []).append(node)
2896
2897         pretty_list = [
2898           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2899           # Sort so that we always list the primary node first.
2900           for group, nodes in sorted(instance_groups.items(),
2901                                      key=lambda (_, nodes): pnode in nodes,
2902                                      reverse=True)]
2903
2904         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2905                       instance, "instance has primary and secondary nodes in"
2906                       " different groups: %s", utils.CommaJoin(pretty_list),
2907                       code=self.ETYPE_WARNING)
2908
2909       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2910         i_non_a_balanced.append(instance)
2911
2912       for snode in inst_config.secondary_nodes:
2913         s_img = node_image[snode]
2914         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2915                  "instance %s, connection to secondary node failed", instance)
2916
2917         if s_img.offline:
2918           inst_nodes_offline.append(snode)
2919
2920       # warn that the instance lives on offline nodes
2921       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2922                "instance has offline secondary node(s) %s",
2923                utils.CommaJoin(inst_nodes_offline))
2924       # ... or ghost/non-vm_capable nodes
2925       for node in inst_config.all_nodes:
2926         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2927                  "instance lives on ghost node %s", node)
2928         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2929                  instance, "instance lives on non-vm_capable node %s", node)
2930
2931     feedback_fn("* Verifying orphan volumes")
2932     reserved = utils.FieldSet(*cluster.reserved_lvs)
2933
2934     # We will get spurious "unknown volume" warnings if any node of this group
2935     # is secondary for an instance whose primary is in another group. To avoid
2936     # them, we find these instances and add their volumes to node_vol_should.
2937     for inst in self.all_inst_info.values():
2938       for secondary in inst.secondary_nodes:
2939         if (secondary in self.my_node_info
2940             and inst.name not in self.my_inst_info):
2941           inst.MapLVsByNode(node_vol_should)
2942           break
2943
2944     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2945
2946     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2947       feedback_fn("* Verifying N+1 Memory redundancy")
2948       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2949
2950     feedback_fn("* Other Notes")
2951     if i_non_redundant:
2952       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2953                   % len(i_non_redundant))
2954
2955     if i_non_a_balanced:
2956       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2957                   % len(i_non_a_balanced))
2958
2959     if n_offline:
2960       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2961
2962     if n_drained:
2963       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2964
2965     return not self.bad
2966
2967   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2968     """Analyze the post-hooks' result
2969
2970     This method analyses the hook result, handles it, and sends some
2971     nicely-formatted feedback back to the user.
2972
2973     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2974         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2975     @param hooks_results: the results of the multi-node hooks rpc call
2976     @param feedback_fn: function used send feedback back to the caller
2977     @param lu_result: previous Exec result
2978     @return: the new Exec result, based on the previous result
2979         and hook results
2980
2981     """
2982     # We only really run POST phase hooks, only for non-empty groups,
2983     # and are only interested in their results
2984     if not self.my_node_names:
2985       # empty node group
2986       pass
2987     elif phase == constants.HOOKS_PHASE_POST:
2988       # Used to change hooks' output to proper indentation
2989       feedback_fn("* Hooks Results")
2990       assert hooks_results, "invalid result from hooks"
2991
2992       for node_name in hooks_results:
2993         res = hooks_results[node_name]
2994         msg = res.fail_msg
2995         test = msg and not res.offline
2996         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2997                       "Communication failure in hooks execution: %s", msg)
2998         if res.offline or msg:
2999           # No need to investigate payload if node is offline or gave
3000           # an error.
3001           continue
3002         for script, hkr, output in res.payload:
3003           test = hkr == constants.HKR_FAIL
3004           self._ErrorIf(test, self.ENODEHOOKS, node_name,
3005                         "Script %s failed, output:", script)
3006           if test:
3007             output = self._HOOKS_INDENT_RE.sub("      ", output)
3008             feedback_fn("%s" % output)
3009             lu_result = False
3010
3011     return lu_result
3012
3013
3014 class LUClusterVerifyDisks(NoHooksLU):
3015   """Verifies the cluster disks status.
3016
3017   """
3018   REQ_BGL = False
3019
3020   def ExpandNames(self):
3021     self.share_locks = _ShareAll()
3022     self.needed_locks = {
3023       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3024       }
3025
3026   def Exec(self, feedback_fn):
3027     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3028
3029     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3030     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3031                            for group in group_names])
3032
3033
3034 class LUGroupVerifyDisks(NoHooksLU):
3035   """Verifies the status of all disks in a node group.
3036
3037   """
3038   REQ_BGL = False
3039
3040   def ExpandNames(self):
3041     # Raises errors.OpPrereqError on its own if group can't be found
3042     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3043
3044     self.share_locks = _ShareAll()
3045     self.needed_locks = {
3046       locking.LEVEL_INSTANCE: [],
3047       locking.LEVEL_NODEGROUP: [],
3048       locking.LEVEL_NODE: [],
3049       }
3050
3051   def DeclareLocks(self, level):
3052     if level == locking.LEVEL_INSTANCE:
3053       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3054
3055       # Lock instances optimistically, needs verification once node and group
3056       # locks have been acquired
3057       self.needed_locks[locking.LEVEL_INSTANCE] = \
3058         self.cfg.GetNodeGroupInstances(self.group_uuid)
3059
3060     elif level == locking.LEVEL_NODEGROUP:
3061       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3062
3063       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3064         set([self.group_uuid] +
3065             # Lock all groups used by instances optimistically; this requires
3066             # going via the node before it's locked, requiring verification
3067             # later on
3068             [group_uuid
3069              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3070              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3071
3072     elif level == locking.LEVEL_NODE:
3073       # This will only lock the nodes in the group to be verified which contain
3074       # actual instances
3075       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3076       self._LockInstancesNodes()
3077
3078       # Lock all nodes in group to be verified
3079       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3080       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3081       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3082
3083   def CheckPrereq(self):
3084     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3085     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3086     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3087
3088     assert self.group_uuid in owned_groups
3089
3090     # Check if locked instances are still correct
3091     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3092
3093     # Get instance information
3094     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3095
3096     # Check if node groups for locked instances are still correct
3097     for (instance_name, inst) in self.instances.items():
3098       assert owned_nodes.issuperset(inst.all_nodes), \
3099         "Instance %s's nodes changed while we kept the lock" % instance_name
3100
3101       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3102                                              owned_groups)
3103
3104       assert self.group_uuid in inst_groups, \
3105         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3106
3107   def Exec(self, feedback_fn):
3108     """Verify integrity of cluster disks.
3109
3110     @rtype: tuple of three items
3111     @return: a tuple of (dict of node-to-node_error, list of instances
3112         which need activate-disks, dict of instance: (node, volume) for
3113         missing volumes
3114
3115     """
3116     res_nodes = {}
3117     res_instances = set()
3118     res_missing = {}
3119
3120     nv_dict = _MapInstanceDisksToNodes([inst
3121                                         for inst in self.instances.values()
3122                                         if inst.admin_up])
3123
3124     if nv_dict:
3125       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3126                              set(self.cfg.GetVmCapableNodeList()))
3127
3128       node_lvs = self.rpc.call_lv_list(nodes, [])
3129
3130       for (node, node_res) in node_lvs.items():
3131         if node_res.offline:
3132           continue
3133
3134         msg = node_res.fail_msg
3135         if msg:
3136           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3137           res_nodes[node] = msg
3138           continue
3139
3140         for lv_name, (_, _, lv_online) in node_res.payload.items():
3141           inst = nv_dict.pop((node, lv_name), None)
3142           if not (lv_online or inst is None):
3143             res_instances.add(inst)
3144
3145       # any leftover items in nv_dict are missing LVs, let's arrange the data
3146       # better
3147       for key, inst in nv_dict.iteritems():
3148         res_missing.setdefault(inst, []).append(key)
3149
3150     return (res_nodes, list(res_instances), res_missing)
3151
3152
3153 class LUClusterRepairDiskSizes(NoHooksLU):
3154   """Verifies the cluster disks sizes.
3155
3156   """
3157   REQ_BGL = False
3158
3159   def ExpandNames(self):
3160     if self.op.instances:
3161       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3162       self.needed_locks = {
3163         locking.LEVEL_NODE: [],
3164         locking.LEVEL_INSTANCE: self.wanted_names,
3165         }
3166       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3167     else:
3168       self.wanted_names = None
3169       self.needed_locks = {
3170         locking.LEVEL_NODE: locking.ALL_SET,
3171         locking.LEVEL_INSTANCE: locking.ALL_SET,
3172         }
3173     self.share_locks = _ShareAll()
3174
3175   def DeclareLocks(self, level):
3176     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3177       self._LockInstancesNodes(primary_only=True)
3178
3179   def CheckPrereq(self):
3180     """Check prerequisites.
3181
3182     This only checks the optional instance list against the existing names.
3183
3184     """
3185     if self.wanted_names is None:
3186       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3187
3188     self.wanted_instances = \
3189         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3190
3191   def _EnsureChildSizes(self, disk):
3192     """Ensure children of the disk have the needed disk size.
3193
3194     This is valid mainly for DRBD8 and fixes an issue where the
3195     children have smaller disk size.
3196
3197     @param disk: an L{ganeti.objects.Disk} object
3198
3199     """
3200     if disk.dev_type == constants.LD_DRBD8:
3201       assert disk.children, "Empty children for DRBD8?"
3202       fchild = disk.children[0]
3203       mismatch = fchild.size < disk.size
3204       if mismatch:
3205         self.LogInfo("Child disk has size %d, parent %d, fixing",
3206                      fchild.size, disk.size)
3207         fchild.size = disk.size
3208
3209       # and we recurse on this child only, not on the metadev
3210       return self._EnsureChildSizes(fchild) or mismatch
3211     else:
3212       return False
3213
3214   def Exec(self, feedback_fn):
3215     """Verify the size of cluster disks.
3216
3217     """
3218     # TODO: check child disks too
3219     # TODO: check differences in size between primary/secondary nodes
3220     per_node_disks = {}
3221     for instance in self.wanted_instances:
3222       pnode = instance.primary_node
3223       if pnode not in per_node_disks:
3224         per_node_disks[pnode] = []
3225       for idx, disk in enumerate(instance.disks):
3226         per_node_disks[pnode].append((instance, idx, disk))
3227
3228     changed = []
3229     for node, dskl in per_node_disks.items():
3230       newl = [v[2].Copy() for v in dskl]
3231       for dsk in newl:
3232         self.cfg.SetDiskID(dsk, node)
3233       result = self.rpc.call_blockdev_getsize(node, newl)
3234       if result.fail_msg:
3235         self.LogWarning("Failure in blockdev_getsize call to node"
3236                         " %s, ignoring", node)
3237         continue
3238       if len(result.payload) != len(dskl):
3239         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3240                         " result.payload=%s", node, len(dskl), result.payload)
3241         self.LogWarning("Invalid result from node %s, ignoring node results",
3242                         node)
3243         continue
3244       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3245         if size is None:
3246           self.LogWarning("Disk %d of instance %s did not return size"
3247                           " information, ignoring", idx, instance.name)
3248           continue
3249         if not isinstance(size, (int, long)):
3250           self.LogWarning("Disk %d of instance %s did not return valid"
3251                           " size information, ignoring", idx, instance.name)
3252           continue
3253         size = size >> 20
3254         if size != disk.size:
3255           self.LogInfo("Disk %d of instance %s has mismatched size,"
3256                        " correcting: recorded %d, actual %d", idx,
3257                        instance.name, disk.size, size)
3258           disk.size = size
3259           self.cfg.Update(instance, feedback_fn)
3260           changed.append((instance.name, idx, size))
3261         if self._EnsureChildSizes(disk):
3262           self.cfg.Update(instance, feedback_fn)
3263           changed.append((instance.name, idx, disk.size))
3264     return changed
3265
3266
3267 class LUClusterRename(LogicalUnit):
3268   """Rename the cluster.
3269
3270   """
3271   HPATH = "cluster-rename"
3272   HTYPE = constants.HTYPE_CLUSTER
3273
3274   def BuildHooksEnv(self):
3275     """Build hooks env.
3276
3277     """
3278     return {
3279       "OP_TARGET": self.cfg.GetClusterName(),
3280       "NEW_NAME": self.op.name,
3281       }
3282
3283   def BuildHooksNodes(self):
3284     """Build hooks nodes.
3285
3286     """
3287     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3288
3289   def CheckPrereq(self):
3290     """Verify that the passed name is a valid one.
3291
3292     """
3293     hostname = netutils.GetHostname(name=self.op.name,
3294                                     family=self.cfg.GetPrimaryIPFamily())
3295
3296     new_name = hostname.name
3297     self.ip = new_ip = hostname.ip
3298     old_name = self.cfg.GetClusterName()
3299     old_ip = self.cfg.GetMasterIP()
3300     if new_name == old_name and new_ip == old_ip:
3301       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3302                                  " cluster has changed",
3303                                  errors.ECODE_INVAL)
3304     if new_ip != old_ip:
3305       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3306         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3307                                    " reachable on the network" %
3308                                    new_ip, errors.ECODE_NOTUNIQUE)
3309
3310     self.op.name = new_name
3311
3312   def Exec(self, feedback_fn):
3313     """Rename the cluster.
3314
3315     """
3316     clustername = self.op.name
3317     ip = self.ip
3318
3319     # shutdown the master IP
3320     master = self.cfg.GetMasterNode()
3321     result = self.rpc.call_node_stop_master(master, False)
3322     result.Raise("Could not disable the master role")
3323
3324     try:
3325       cluster = self.cfg.GetClusterInfo()
3326       cluster.cluster_name = clustername
3327       cluster.master_ip = ip
3328       self.cfg.Update(cluster, feedback_fn)
3329
3330       # update the known hosts file
3331       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3332       node_list = self.cfg.GetOnlineNodeList()
3333       try:
3334         node_list.remove(master)
3335       except ValueError:
3336         pass
3337       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3338     finally:
3339       result = self.rpc.call_node_start_master(master, False, False)
3340       msg = result.fail_msg
3341       if msg:
3342         self.LogWarning("Could not re-enable the master role on"
3343                         " the master, please restart manually: %s", msg)
3344
3345     return clustername
3346
3347
3348 class LUClusterSetParams(LogicalUnit):
3349   """Change the parameters of the cluster.
3350
3351   """
3352   HPATH = "cluster-modify"
3353   HTYPE = constants.HTYPE_CLUSTER
3354   REQ_BGL = False
3355
3356   def CheckArguments(self):
3357     """Check parameters
3358
3359     """
3360     if self.op.uid_pool:
3361       uidpool.CheckUidPool(self.op.uid_pool)
3362
3363     if self.op.add_uids:
3364       uidpool.CheckUidPool(self.op.add_uids)
3365
3366     if self.op.remove_uids:
3367       uidpool.CheckUidPool(self.op.remove_uids)
3368
3369   def ExpandNames(self):
3370     # FIXME: in the future maybe other cluster params won't require checking on
3371     # all nodes to be modified.
3372     self.needed_locks = {
3373       locking.LEVEL_NODE: locking.ALL_SET,
3374     }
3375     self.share_locks[locking.LEVEL_NODE] = 1
3376
3377   def BuildHooksEnv(self):
3378     """Build hooks env.
3379
3380     """
3381     return {
3382       "OP_TARGET": self.cfg.GetClusterName(),
3383       "NEW_VG_NAME": self.op.vg_name,
3384       }
3385
3386   def BuildHooksNodes(self):
3387     """Build hooks nodes.
3388
3389     """
3390     mn = self.cfg.GetMasterNode()
3391     return ([mn], [mn])
3392
3393   def CheckPrereq(self):
3394     """Check prerequisites.
3395
3396     This checks whether the given params don't conflict and
3397     if the given volume group is valid.
3398
3399     """
3400     if self.op.vg_name is not None and not self.op.vg_name:
3401       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3402         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3403                                    " instances exist", errors.ECODE_INVAL)
3404
3405     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3406       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3407         raise errors.OpPrereqError("Cannot disable drbd helper while"
3408                                    " drbd-based instances exist",
3409                                    errors.ECODE_INVAL)
3410
3411     node_list = self.owned_locks(locking.LEVEL_NODE)
3412
3413     # if vg_name not None, checks given volume group on all nodes
3414     if self.op.vg_name:
3415       vglist = self.rpc.call_vg_list(node_list)
3416       for node in node_list:
3417         msg = vglist[node].fail_msg
3418         if msg:
3419           # ignoring down node
3420           self.LogWarning("Error while gathering data on node %s"
3421                           " (ignoring node): %s", node, msg)
3422           continue
3423         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3424                                               self.op.vg_name,
3425                                               constants.MIN_VG_SIZE)
3426         if vgstatus:
3427           raise errors.OpPrereqError("Error on node '%s': %s" %
3428                                      (node, vgstatus), errors.ECODE_ENVIRON)
3429
3430     if self.op.drbd_helper:
3431       # checks given drbd helper on all nodes
3432       helpers = self.rpc.call_drbd_helper(node_list)
3433       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3434         if ninfo.offline:
3435           self.LogInfo("Not checking drbd helper on offline node %s", node)
3436           continue
3437         msg = helpers[node].fail_msg
3438         if msg:
3439           raise errors.OpPrereqError("Error checking drbd helper on node"
3440                                      " '%s': %s" % (node, msg),
3441                                      errors.ECODE_ENVIRON)
3442         node_helper = helpers[node].payload
3443         if node_helper != self.op.drbd_helper:
3444           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3445                                      (node, node_helper), errors.ECODE_ENVIRON)
3446
3447     self.cluster = cluster = self.cfg.GetClusterInfo()
3448     # validate params changes
3449     if self.op.beparams:
3450       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3451       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3452
3453     if self.op.ndparams:
3454       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3455       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3456
3457       # TODO: we need a more general way to handle resetting
3458       # cluster-level parameters to default values
3459       if self.new_ndparams["oob_program"] == "":
3460         self.new_ndparams["oob_program"] = \
3461             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3462
3463     if self.op.nicparams:
3464       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3465       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3466       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3467       nic_errors = []
3468
3469       # check all instances for consistency
3470       for instance in self.cfg.GetAllInstancesInfo().values():
3471         for nic_idx, nic in enumerate(instance.nics):
3472           params_copy = copy.deepcopy(nic.nicparams)
3473           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3474
3475           # check parameter syntax
3476           try:
3477             objects.NIC.CheckParameterSyntax(params_filled)
3478           except errors.ConfigurationError, err:
3479             nic_errors.append("Instance %s, nic/%d: %s" %
3480                               (instance.name, nic_idx, err))
3481
3482           # if we're moving instances to routed, check that they have an ip
3483           target_mode = params_filled[constants.NIC_MODE]
3484           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3485             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3486                               " address" % (instance.name, nic_idx))
3487       if nic_errors:
3488         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3489                                    "\n".join(nic_errors))
3490
3491     # hypervisor list/parameters
3492     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3493     if self.op.hvparams:
3494       for hv_name, hv_dict in self.op.hvparams.items():
3495         if hv_name not in self.new_hvparams:
3496           self.new_hvparams[hv_name] = hv_dict
3497         else:
3498           self.new_hvparams[hv_name].update(hv_dict)
3499
3500     # os hypervisor parameters
3501     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3502     if self.op.os_hvp:
3503       for os_name, hvs in self.op.os_hvp.items():
3504         if os_name not in self.new_os_hvp:
3505           self.new_os_hvp[os_name] = hvs
3506         else:
3507           for hv_name, hv_dict in hvs.items():
3508             if hv_name not in self.new_os_hvp[os_name]:
3509               self.new_os_hvp[os_name][hv_name] = hv_dict
3510             else:
3511               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3512
3513     # os parameters
3514     self.new_osp = objects.FillDict(cluster.osparams, {})
3515     if self.op.osparams:
3516       for os_name, osp in self.op.osparams.items():
3517         if os_name not in self.new_osp:
3518           self.new_osp[os_name] = {}
3519
3520         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3521                                                   use_none=True)
3522
3523         if not self.new_osp[os_name]:
3524           # we removed all parameters
3525           del self.new_osp[os_name]
3526         else:
3527           # check the parameter validity (remote check)
3528           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3529                          os_name, self.new_osp[os_name])
3530
3531     # changes to the hypervisor list
3532     if self.op.enabled_hypervisors is not None:
3533       self.hv_list = self.op.enabled_hypervisors
3534       for hv in self.hv_list:
3535         # if the hypervisor doesn't already exist in the cluster
3536         # hvparams, we initialize it to empty, and then (in both
3537         # cases) we make sure to fill the defaults, as we might not
3538         # have a complete defaults list if the hypervisor wasn't
3539         # enabled before
3540         if hv not in new_hvp:
3541           new_hvp[hv] = {}
3542         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3543         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3544     else:
3545       self.hv_list = cluster.enabled_hypervisors
3546
3547     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3548       # either the enabled list has changed, or the parameters have, validate
3549       for hv_name, hv_params in self.new_hvparams.items():
3550         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3551             (self.op.enabled_hypervisors and
3552              hv_name in self.op.enabled_hypervisors)):
3553           # either this is a new hypervisor, or its parameters have changed
3554           hv_class = hypervisor.GetHypervisor(hv_name)
3555           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3556           hv_class.CheckParameterSyntax(hv_params)
3557           _CheckHVParams(self, node_list, hv_name, hv_params)
3558
3559     if self.op.os_hvp:
3560       # no need to check any newly-enabled hypervisors, since the
3561       # defaults have already been checked in the above code-block
3562       for os_name, os_hvp in self.new_os_hvp.items():
3563         for hv_name, hv_params in os_hvp.items():
3564           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3565           # we need to fill in the new os_hvp on top of the actual hv_p
3566           cluster_defaults = self.new_hvparams.get(hv_name, {})
3567           new_osp = objects.FillDict(cluster_defaults, hv_params)
3568           hv_class = hypervisor.GetHypervisor(hv_name)
3569           hv_class.CheckParameterSyntax(new_osp)
3570           _CheckHVParams(self, node_list, hv_name, new_osp)
3571
3572     if self.op.default_iallocator:
3573       alloc_script = utils.FindFile(self.op.default_iallocator,
3574                                     constants.IALLOCATOR_SEARCH_PATH,
3575                                     os.path.isfile)
3576       if alloc_script is None:
3577         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3578                                    " specified" % self.op.default_iallocator,
3579                                    errors.ECODE_INVAL)
3580
3581   def Exec(self, feedback_fn):
3582     """Change the parameters of the cluster.
3583
3584     """
3585     if self.op.vg_name is not None:
3586       new_volume = self.op.vg_name
3587       if not new_volume:
3588         new_volume = None
3589       if new_volume != self.cfg.GetVGName():
3590         self.cfg.SetVGName(new_volume)
3591       else:
3592         feedback_fn("Cluster LVM configuration already in desired"
3593                     " state, not changing")
3594     if self.op.drbd_helper is not None:
3595       new_helper = self.op.drbd_helper
3596       if not new_helper:
3597         new_helper = None
3598       if new_helper != self.cfg.GetDRBDHelper():
3599         self.cfg.SetDRBDHelper(new_helper)
3600       else:
3601         feedback_fn("Cluster DRBD helper already in desired state,"
3602                     " not changing")
3603     if self.op.hvparams:
3604       self.cluster.hvparams = self.new_hvparams
3605     if self.op.os_hvp:
3606       self.cluster.os_hvp = self.new_os_hvp
3607     if self.op.enabled_hypervisors is not None:
3608       self.cluster.hvparams = self.new_hvparams
3609       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3610     if self.op.beparams:
3611       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3612     if self.op.nicparams:
3613       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3614     if self.op.osparams:
3615       self.cluster.osparams = self.new_osp
3616     if self.op.ndparams:
3617       self.cluster.ndparams = self.new_ndparams
3618
3619     if self.op.candidate_pool_size is not None:
3620       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3621       # we need to update the pool size here, otherwise the save will fail
3622       _AdjustCandidatePool(self, [])
3623
3624     if self.op.maintain_node_health is not None:
3625       self.cluster.maintain_node_health = self.op.maintain_node_health
3626
3627     if self.op.prealloc_wipe_disks is not None:
3628       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3629
3630     if self.op.add_uids is not None:
3631       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3632
3633     if self.op.remove_uids is not None:
3634       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3635
3636     if self.op.uid_pool is not None:
3637       self.cluster.uid_pool = self.op.uid_pool
3638
3639     if self.op.default_iallocator is not None:
3640       self.cluster.default_iallocator = self.op.default_iallocator
3641
3642     if self.op.reserved_lvs is not None:
3643       self.cluster.reserved_lvs = self.op.reserved_lvs
3644
3645     def helper_os(aname, mods, desc):
3646       desc += " OS list"
3647       lst = getattr(self.cluster, aname)
3648       for key, val in mods:
3649         if key == constants.DDM_ADD:
3650           if val in lst:
3651             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3652           else:
3653             lst.append(val)
3654         elif key == constants.DDM_REMOVE:
3655           if val in lst:
3656             lst.remove(val)
3657           else:
3658             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3659         else:
3660           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3661
3662     if self.op.hidden_os:
3663       helper_os("hidden_os", self.op.hidden_os, "hidden")
3664
3665     if self.op.blacklisted_os:
3666       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3667
3668     if self.op.master_netdev:
3669       master = self.cfg.GetMasterNode()
3670       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3671                   self.cluster.master_netdev)
3672       result = self.rpc.call_node_stop_master(master, False)
3673       result.Raise("Could not disable the master ip")
3674       feedback_fn("Changing master_netdev from %s to %s" %
3675                   (self.cluster.master_netdev, self.op.master_netdev))
3676       self.cluster.master_netdev = self.op.master_netdev
3677
3678     self.cfg.Update(self.cluster, feedback_fn)
3679
3680     if self.op.master_netdev:
3681       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3682                   self.op.master_netdev)
3683       result = self.rpc.call_node_start_master(master, False, False)
3684       if result.fail_msg:
3685         self.LogWarning("Could not re-enable the master ip on"
3686                         " the master, please restart manually: %s",
3687                         result.fail_msg)
3688
3689
3690 def _UploadHelper(lu, nodes, fname):
3691   """Helper for uploading a file and showing warnings.
3692
3693   """
3694   if os.path.exists(fname):
3695     result = lu.rpc.call_upload_file(nodes, fname)
3696     for to_node, to_result in result.items():
3697       msg = to_result.fail_msg
3698       if msg:
3699         msg = ("Copy of file %s to node %s failed: %s" %
3700                (fname, to_node, msg))
3701         lu.proc.LogWarning(msg)
3702
3703
3704 def _ComputeAncillaryFiles(cluster, redist):
3705   """Compute files external to Ganeti which need to be consistent.
3706
3707   @type redist: boolean
3708   @param redist: Whether to include files which need to be redistributed
3709
3710   """
3711   # Compute files for all nodes
3712   files_all = set([
3713     constants.SSH_KNOWN_HOSTS_FILE,
3714     constants.CONFD_HMAC_KEY,
3715     constants.CLUSTER_DOMAIN_SECRET_FILE,
3716     ])
3717
3718   if not redist:
3719     files_all.update(constants.ALL_CERT_FILES)
3720     files_all.update(ssconf.SimpleStore().GetFileList())
3721   else:
3722     # we need to ship at least the RAPI certificate
3723     files_all.add(constants.RAPI_CERT_FILE)
3724
3725   if cluster.modify_etc_hosts:
3726     files_all.add(constants.ETC_HOSTS)
3727
3728   # Files which must either exist on all nodes or on none
3729   files_all_opt = set([
3730     constants.RAPI_USERS_FILE,
3731     ])
3732
3733   # Files which should only be on master candidates
3734   files_mc = set()
3735   if not redist:
3736     files_mc.add(constants.CLUSTER_CONF_FILE)
3737
3738   # Files which should only be on VM-capable nodes
3739   files_vm = set(filename
3740     for hv_name in cluster.enabled_hypervisors
3741     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3742
3743   # Filenames must be unique
3744   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3745           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3746          "Found file listed in more than one file list"
3747
3748   return (files_all, files_all_opt, files_mc, files_vm)
3749
3750
3751 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3752   """Distribute additional files which are part of the cluster configuration.
3753
3754   ConfigWriter takes care of distributing the config and ssconf files, but
3755   there are more files which should be distributed to all nodes. This function
3756   makes sure those are copied.
3757
3758   @param lu: calling logical unit
3759   @param additional_nodes: list of nodes not in the config to distribute to
3760   @type additional_vm: boolean
3761   @param additional_vm: whether the additional nodes are vm-capable or not
3762
3763   """
3764   # Gather target nodes
3765   cluster = lu.cfg.GetClusterInfo()
3766   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3767
3768   online_nodes = lu.cfg.GetOnlineNodeList()
3769   vm_nodes = lu.cfg.GetVmCapableNodeList()
3770
3771   if additional_nodes is not None:
3772     online_nodes.extend(additional_nodes)
3773     if additional_vm:
3774       vm_nodes.extend(additional_nodes)
3775
3776   # Never distribute to master node
3777   for nodelist in [online_nodes, vm_nodes]:
3778     if master_info.name in nodelist:
3779       nodelist.remove(master_info.name)
3780
3781   # Gather file lists
3782   (files_all, files_all_opt, files_mc, files_vm) = \
3783     _ComputeAncillaryFiles(cluster, True)
3784
3785   # Never re-distribute configuration file from here
3786   assert not (constants.CLUSTER_CONF_FILE in files_all or
3787               constants.CLUSTER_CONF_FILE in files_vm)
3788   assert not files_mc, "Master candidates not handled in this function"
3789
3790   filemap = [
3791     (online_nodes, files_all),
3792     (online_nodes, files_all_opt),
3793     (vm_nodes, files_vm),
3794     ]
3795
3796   # Upload the files
3797   for (node_list, files) in filemap:
3798     for fname in files:
3799       _UploadHelper(lu, node_list, fname)
3800
3801
3802 class LUClusterRedistConf(NoHooksLU):
3803   """Force the redistribution of cluster configuration.
3804
3805   This is a very simple LU.
3806
3807   """
3808   REQ_BGL = False
3809
3810   def ExpandNames(self):
3811     self.needed_locks = {
3812       locking.LEVEL_NODE: locking.ALL_SET,
3813     }
3814     self.share_locks[locking.LEVEL_NODE] = 1
3815
3816   def Exec(self, feedback_fn):
3817     """Redistribute the configuration.
3818
3819     """
3820     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3821     _RedistributeAncillaryFiles(self)
3822
3823
3824 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3825   """Sleep and poll for an instance's disk to sync.
3826
3827   """
3828   if not instance.disks or disks is not None and not disks:
3829     return True
3830
3831   disks = _ExpandCheckDisks(instance, disks)
3832
3833   if not oneshot:
3834     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3835
3836   node = instance.primary_node
3837
3838   for dev in disks:
3839     lu.cfg.SetDiskID(dev, node)
3840
3841   # TODO: Convert to utils.Retry
3842
3843   retries = 0
3844   degr_retries = 10 # in seconds, as we sleep 1 second each time
3845   while True:
3846     max_time = 0
3847     done = True
3848     cumul_degraded = False
3849     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3850     msg = rstats.fail_msg
3851     if msg:
3852       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3853       retries += 1
3854       if retries >= 10:
3855         raise errors.RemoteError("Can't contact node %s for mirror data,"
3856                                  " aborting." % node)
3857       time.sleep(6)
3858       continue
3859     rstats = rstats.payload
3860     retries = 0
3861     for i, mstat in enumerate(rstats):
3862       if mstat is None:
3863         lu.LogWarning("Can't compute data for node %s/%s",
3864                            node, disks[i].iv_name)
3865         continue
3866
3867       cumul_degraded = (cumul_degraded or
3868                         (mstat.is_degraded and mstat.sync_percent is None))
3869       if mstat.sync_percent is not None:
3870         done = False
3871         if mstat.estimated_time is not None:
3872           rem_time = ("%s remaining (estimated)" %
3873                       utils.FormatSeconds(mstat.estimated_time))
3874           max_time = mstat.estimated_time
3875         else:
3876           rem_time = "no time estimate"
3877         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3878                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3879
3880     # if we're done but degraded, let's do a few small retries, to
3881     # make sure we see a stable and not transient situation; therefore
3882     # we force restart of the loop
3883     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3884       logging.info("Degraded disks found, %d retries left", degr_retries)
3885       degr_retries -= 1
3886       time.sleep(1)
3887       continue
3888
3889     if done or oneshot:
3890       break
3891
3892     time.sleep(min(60, max_time))
3893
3894   if done:
3895     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3896   return not cumul_degraded
3897
3898
3899 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3900   """Check that mirrors are not degraded.
3901
3902   The ldisk parameter, if True, will change the test from the
3903   is_degraded attribute (which represents overall non-ok status for
3904   the device(s)) to the ldisk (representing the local storage status).
3905
3906   """
3907   lu.cfg.SetDiskID(dev, node)
3908
3909   result = True
3910
3911   if on_primary or dev.AssembleOnSecondary():
3912     rstats = lu.rpc.call_blockdev_find(node, dev)
3913     msg = rstats.fail_msg
3914     if msg:
3915       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3916       result = False
3917     elif not rstats.payload:
3918       lu.LogWarning("Can't find disk on node %s", node)
3919       result = False
3920     else:
3921       if ldisk:
3922         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3923       else:
3924         result = result and not rstats.payload.is_degraded
3925
3926   if dev.children:
3927     for child in dev.children:
3928       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3929
3930   return result
3931
3932
3933 class LUOobCommand(NoHooksLU):
3934   """Logical unit for OOB handling.
3935
3936   """
3937   REG_BGL = False
3938   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3939
3940   def ExpandNames(self):
3941     """Gather locks we need.
3942
3943     """
3944     if self.op.node_names:
3945       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3946       lock_names = self.op.node_names
3947     else:
3948       lock_names = locking.ALL_SET
3949
3950     self.needed_locks = {
3951       locking.LEVEL_NODE: lock_names,
3952       }
3953
3954   def CheckPrereq(self):
3955     """Check prerequisites.
3956
3957     This checks:
3958      - the node exists in the configuration
3959      - OOB is supported
3960
3961     Any errors are signaled by raising errors.OpPrereqError.
3962
3963     """
3964     self.nodes = []
3965     self.master_node = self.cfg.GetMasterNode()
3966
3967     assert self.op.power_delay >= 0.0
3968
3969     if self.op.node_names:
3970       if (self.op.command in self._SKIP_MASTER and
3971           self.master_node in self.op.node_names):
3972         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3973         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3974
3975         if master_oob_handler:
3976           additional_text = ("run '%s %s %s' if you want to operate on the"
3977                              " master regardless") % (master_oob_handler,
3978                                                       self.op.command,
3979                                                       self.master_node)
3980         else:
3981           additional_text = "it does not support out-of-band operations"
3982
3983         raise errors.OpPrereqError(("Operating on the master node %s is not"
3984                                     " allowed for %s; %s") %
3985                                    (self.master_node, self.op.command,
3986                                     additional_text), errors.ECODE_INVAL)
3987     else:
3988       self.op.node_names = self.cfg.GetNodeList()
3989       if self.op.command in self._SKIP_MASTER:
3990         self.op.node_names.remove(self.master_node)
3991
3992     if self.op.command in self._SKIP_MASTER:
3993       assert self.master_node not in self.op.node_names
3994
3995     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
3996       if node is None:
3997         raise errors.OpPrereqError("Node %s not found" % node_name,
3998                                    errors.ECODE_NOENT)
3999       else:
4000         self.nodes.append(node)
4001
4002       if (not self.op.ignore_status and
4003           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4004         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4005                                     " not marked offline") % node_name,
4006                                    errors.ECODE_STATE)
4007
4008   def Exec(self, feedback_fn):
4009     """Execute OOB and return result if we expect any.
4010
4011     """
4012     master_node = self.master_node
4013     ret = []
4014
4015     for idx, node in enumerate(utils.NiceSort(self.nodes,
4016                                               key=lambda node: node.name)):
4017       node_entry = [(constants.RS_NORMAL, node.name)]
4018       ret.append(node_entry)
4019
4020       oob_program = _SupportsOob(self.cfg, node)
4021
4022       if not oob_program:
4023         node_entry.append((constants.RS_UNAVAIL, None))
4024         continue
4025
4026       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4027                    self.op.command, oob_program, node.name)
4028       result = self.rpc.call_run_oob(master_node, oob_program,
4029                                      self.op.command, node.name,
4030                                      self.op.timeout)
4031
4032       if result.fail_msg:
4033         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4034                         node.name, result.fail_msg)
4035         node_entry.append((constants.RS_NODATA, None))
4036       else:
4037         try:
4038           self._CheckPayload(result)
4039         except errors.OpExecError, err:
4040           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4041                           node.name, err)
4042           node_entry.append((constants.RS_NODATA, None))
4043         else:
4044           if self.op.command == constants.OOB_HEALTH:
4045             # For health we should log important events
4046             for item, status in result.payload:
4047               if status in [constants.OOB_STATUS_WARNING,
4048                             constants.OOB_STATUS_CRITICAL]:
4049                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4050                                 item, node.name, status)
4051
4052           if self.op.command == constants.OOB_POWER_ON:
4053             node.powered = True
4054           elif self.op.command == constants.OOB_POWER_OFF:
4055             node.powered = False
4056           elif self.op.command == constants.OOB_POWER_STATUS:
4057             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4058             if powered != node.powered:
4059               logging.warning(("Recorded power state (%s) of node '%s' does not"
4060                                " match actual power state (%s)"), node.powered,
4061                               node.name, powered)
4062
4063           # For configuration changing commands we should update the node
4064           if self.op.command in (constants.OOB_POWER_ON,
4065                                  constants.OOB_POWER_OFF):
4066             self.cfg.Update(node, feedback_fn)
4067
4068           node_entry.append((constants.RS_NORMAL, result.payload))
4069
4070           if (self.op.command == constants.OOB_POWER_ON and
4071               idx < len(self.nodes) - 1):
4072             time.sleep(self.op.power_delay)
4073
4074     return ret
4075
4076   def _CheckPayload(self, result):
4077     """Checks if the payload is valid.
4078
4079     @param result: RPC result
4080     @raises errors.OpExecError: If payload is not valid
4081
4082     """
4083     errs = []
4084     if self.op.command == constants.OOB_HEALTH:
4085       if not isinstance(result.payload, list):
4086         errs.append("command 'health' is expected to return a list but got %s" %
4087                     type(result.payload))
4088       else:
4089         for item, status in result.payload:
4090           if status not in constants.OOB_STATUSES:
4091             errs.append("health item '%s' has invalid status '%s'" %
4092                         (item, status))
4093
4094     if self.op.command == constants.OOB_POWER_STATUS:
4095       if not isinstance(result.payload, dict):
4096         errs.append("power-status is expected to return a dict but got %s" %
4097                     type(result.payload))
4098
4099     if self.op.command in [
4100         constants.OOB_POWER_ON,
4101         constants.OOB_POWER_OFF,
4102         constants.OOB_POWER_CYCLE,
4103         ]:
4104       if result.payload is not None:
4105         errs.append("%s is expected to not return payload but got '%s'" %
4106                     (self.op.command, result.payload))
4107
4108     if errs:
4109       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4110                                utils.CommaJoin(errs))
4111
4112
4113 class _OsQuery(_QueryBase):
4114   FIELDS = query.OS_FIELDS
4115
4116   def ExpandNames(self, lu):
4117     # Lock all nodes in shared mode
4118     # Temporary removal of locks, should be reverted later
4119     # TODO: reintroduce locks when they are lighter-weight
4120     lu.needed_locks = {}
4121     #self.share_locks[locking.LEVEL_NODE] = 1
4122     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4123
4124     # The following variables interact with _QueryBase._GetNames
4125     if self.names:
4126       self.wanted = self.names
4127     else:
4128       self.wanted = locking.ALL_SET
4129
4130     self.do_locking = self.use_locking
4131
4132   def DeclareLocks(self, lu, level):
4133     pass
4134
4135   @staticmethod
4136   def _DiagnoseByOS(rlist):
4137     """Remaps a per-node return list into an a per-os per-node dictionary
4138
4139     @param rlist: a map with node names as keys and OS objects as values
4140
4141     @rtype: dict
4142     @return: a dictionary with osnames as keys and as value another
4143         map, with nodes as keys and tuples of (path, status, diagnose,
4144         variants, parameters, api_versions) as values, eg::
4145
4146           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4147                                      (/srv/..., False, "invalid api")],
4148                            "node2": [(/srv/..., True, "", [], [])]}
4149           }
4150
4151     """
4152     all_os = {}
4153     # we build here the list of nodes that didn't fail the RPC (at RPC
4154     # level), so that nodes with a non-responding node daemon don't
4155     # make all OSes invalid
4156     good_nodes = [node_name for node_name in rlist
4157                   if not rlist[node_name].fail_msg]
4158     for node_name, nr in rlist.items():
4159       if nr.fail_msg or not nr.payload:
4160         continue
4161       for (name, path, status, diagnose, variants,
4162            params, api_versions) in nr.payload:
4163         if name not in all_os:
4164           # build a list of nodes for this os containing empty lists
4165           # for each node in node_list
4166           all_os[name] = {}
4167           for nname in good_nodes:
4168             all_os[name][nname] = []
4169         # convert params from [name, help] to (name, help)
4170         params = [tuple(v) for v in params]
4171         all_os[name][node_name].append((path, status, diagnose,
4172                                         variants, params, api_versions))
4173     return all_os
4174
4175   def _GetQueryData(self, lu):
4176     """Computes the list of nodes and their attributes.
4177
4178     """
4179     # Locking is not used
4180     assert not (compat.any(lu.glm.is_owned(level)
4181                            for level in locking.LEVELS
4182                            if level != locking.LEVEL_CLUSTER) or
4183                 self.do_locking or self.use_locking)
4184
4185     valid_nodes = [node.name
4186                    for node in lu.cfg.GetAllNodesInfo().values()
4187                    if not node.offline and node.vm_capable]
4188     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4189     cluster = lu.cfg.GetClusterInfo()
4190
4191     data = {}
4192
4193     for (os_name, os_data) in pol.items():
4194       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4195                           hidden=(os_name in cluster.hidden_os),
4196                           blacklisted=(os_name in cluster.blacklisted_os))
4197
4198       variants = set()
4199       parameters = set()
4200       api_versions = set()
4201
4202       for idx, osl in enumerate(os_data.values()):
4203         info.valid = bool(info.valid and osl and osl[0][1])
4204         if not info.valid:
4205           break
4206
4207         (node_variants, node_params, node_api) = osl[0][3:6]
4208         if idx == 0:
4209           # First entry
4210           variants.update(node_variants)
4211           parameters.update(node_params)
4212           api_versions.update(node_api)
4213         else:
4214           # Filter out inconsistent values
4215           variants.intersection_update(node_variants)
4216           parameters.intersection_update(node_params)
4217           api_versions.intersection_update(node_api)
4218
4219       info.variants = list(variants)
4220       info.parameters = list(parameters)
4221       info.api_versions = list(api_versions)
4222
4223       data[os_name] = info
4224
4225     # Prepare data in requested order
4226     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4227             if name in data]
4228
4229
4230 class LUOsDiagnose(NoHooksLU):
4231   """Logical unit for OS diagnose/query.
4232
4233   """
4234   REQ_BGL = False
4235
4236   @staticmethod
4237   def _BuildFilter(fields, names):
4238     """Builds a filter for querying OSes.
4239
4240     """
4241     name_filter = qlang.MakeSimpleFilter("name", names)
4242
4243     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4244     # respective field is not requested
4245     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4246                      for fname in ["hidden", "blacklisted"]
4247                      if fname not in fields]
4248     if "valid" not in fields:
4249       status_filter.append([qlang.OP_TRUE, "valid"])
4250
4251     if status_filter:
4252       status_filter.insert(0, qlang.OP_AND)
4253     else:
4254       status_filter = None
4255
4256     if name_filter and status_filter:
4257       return [qlang.OP_AND, name_filter, status_filter]
4258     elif name_filter:
4259       return name_filter
4260     else:
4261       return status_filter
4262
4263   def CheckArguments(self):
4264     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4265                        self.op.output_fields, False)
4266
4267   def ExpandNames(self):
4268     self.oq.ExpandNames(self)
4269
4270   def Exec(self, feedback_fn):
4271     return self.oq.OldStyleQuery(self)
4272
4273
4274 class LUNodeRemove(LogicalUnit):
4275   """Logical unit for removing a node.
4276
4277   """
4278   HPATH = "node-remove"
4279   HTYPE = constants.HTYPE_NODE
4280
4281   def BuildHooksEnv(self):
4282     """Build hooks env.
4283
4284     This doesn't run on the target node in the pre phase as a failed
4285     node would then be impossible to remove.
4286
4287     """
4288     return {
4289       "OP_TARGET": self.op.node_name,
4290       "NODE_NAME": self.op.node_name,
4291       }
4292
4293   def BuildHooksNodes(self):
4294     """Build hooks nodes.
4295
4296     """
4297     all_nodes = self.cfg.GetNodeList()
4298     try:
4299       all_nodes.remove(self.op.node_name)
4300     except ValueError:
4301       logging.warning("Node '%s', which is about to be removed, was not found"
4302                       " in the list of all nodes", self.op.node_name)
4303     return (all_nodes, all_nodes)
4304
4305   def CheckPrereq(self):
4306     """Check prerequisites.
4307
4308     This checks:
4309      - the node exists in the configuration
4310      - it does not have primary or secondary instances
4311      - it's not the master
4312
4313     Any errors are signaled by raising errors.OpPrereqError.
4314
4315     """
4316     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4317     node = self.cfg.GetNodeInfo(self.op.node_name)
4318     assert node is not None
4319
4320     masternode = self.cfg.GetMasterNode()
4321     if node.name == masternode:
4322       raise errors.OpPrereqError("Node is the master node, failover to another"
4323                                  " node is required", errors.ECODE_INVAL)
4324
4325     for instance_name, instance in self.cfg.GetAllInstancesInfo():
4326       if node.name in instance.all_nodes:
4327         raise errors.OpPrereqError("Instance %s is still running on the node,"
4328                                    " please remove first" % instance_name,
4329                                    errors.ECODE_INVAL)
4330     self.op.node_name = node.name
4331     self.node = node
4332
4333   def Exec(self, feedback_fn):
4334     """Removes the node from the cluster.
4335
4336     """
4337     node = self.node
4338     logging.info("Stopping the node daemon and removing configs from node %s",
4339                  node.name)
4340
4341     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4342
4343     # Promote nodes to master candidate as needed
4344     _AdjustCandidatePool(self, exceptions=[node.name])
4345     self.context.RemoveNode(node.name)
4346
4347     # Run post hooks on the node before it's removed
4348     _RunPostHook(self, node.name)
4349
4350     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4351     msg = result.fail_msg
4352     if msg:
4353       self.LogWarning("Errors encountered on the remote node while leaving"
4354                       " the cluster: %s", msg)
4355
4356     # Remove node from our /etc/hosts
4357     if self.cfg.GetClusterInfo().modify_etc_hosts:
4358       master_node = self.cfg.GetMasterNode()
4359       result = self.rpc.call_etc_hosts_modify(master_node,
4360                                               constants.ETC_HOSTS_REMOVE,
4361                                               node.name, None)
4362       result.Raise("Can't update hosts file with new host data")
4363       _RedistributeAncillaryFiles(self)
4364
4365
4366 class _NodeQuery(_QueryBase):
4367   FIELDS = query.NODE_FIELDS
4368
4369   def ExpandNames(self, lu):
4370     lu.needed_locks = {}
4371     lu.share_locks = _ShareAll()
4372
4373     if self.names:
4374       self.wanted = _GetWantedNodes(lu, self.names)
4375     else:
4376       self.wanted = locking.ALL_SET
4377
4378     self.do_locking = (self.use_locking and
4379                        query.NQ_LIVE in self.requested_data)
4380
4381     if self.do_locking:
4382       # If any non-static field is requested we need to lock the nodes
4383       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4384
4385   def DeclareLocks(self, lu, level):
4386     pass
4387
4388   def _GetQueryData(self, lu):
4389     """Computes the list of nodes and their attributes.
4390
4391     """
4392     all_info = lu.cfg.GetAllNodesInfo()
4393
4394     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4395
4396     # Gather data as requested
4397     if query.NQ_LIVE in self.requested_data:
4398       # filter out non-vm_capable nodes
4399       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4400
4401       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4402                                         lu.cfg.GetHypervisorType())
4403       live_data = dict((name, nresult.payload)
4404                        for (name, nresult) in node_data.items()
4405                        if not nresult.fail_msg and nresult.payload)
4406     else:
4407       live_data = None
4408
4409     if query.NQ_INST in self.requested_data:
4410       node_to_primary = dict([(name, set()) for name in nodenames])
4411       node_to_secondary = dict([(name, set()) for name in nodenames])
4412
4413       inst_data = lu.cfg.GetAllInstancesInfo()
4414
4415       for inst in inst_data.values():
4416         if inst.primary_node in node_to_primary:
4417           node_to_primary[inst.primary_node].add(inst.name)
4418         for secnode in inst.secondary_nodes:
4419           if secnode in node_to_secondary:
4420             node_to_secondary[secnode].add(inst.name)
4421     else:
4422       node_to_primary = None
4423       node_to_secondary = None
4424
4425     if query.NQ_OOB in self.requested_data:
4426       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4427                          for name, node in all_info.iteritems())
4428     else:
4429       oob_support = None
4430
4431     if query.NQ_GROUP in self.requested_data:
4432       groups = lu.cfg.GetAllNodeGroupsInfo()
4433     else:
4434       groups = {}
4435
4436     return query.NodeQueryData([all_info[name] for name in nodenames],
4437                                live_data, lu.cfg.GetMasterNode(),
4438                                node_to_primary, node_to_secondary, groups,
4439                                oob_support, lu.cfg.GetClusterInfo())
4440
4441
4442 class LUNodeQuery(NoHooksLU):
4443   """Logical unit for querying nodes.
4444
4445   """
4446   # pylint: disable=W0142
4447   REQ_BGL = False
4448
4449   def CheckArguments(self):
4450     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4451                          self.op.output_fields, self.op.use_locking)
4452
4453   def ExpandNames(self):
4454     self.nq.ExpandNames(self)
4455
4456   def Exec(self, feedback_fn):
4457     return self.nq.OldStyleQuery(self)
4458
4459
4460 class LUNodeQueryvols(NoHooksLU):
4461   """Logical unit for getting volumes on node(s).
4462
4463   """
4464   REQ_BGL = False
4465   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4466   _FIELDS_STATIC = utils.FieldSet("node")
4467
4468   def CheckArguments(self):
4469     _CheckOutputFields(static=self._FIELDS_STATIC,
4470                        dynamic=self._FIELDS_DYNAMIC,
4471                        selected=self.op.output_fields)
4472
4473   def ExpandNames(self):
4474     self.needed_locks = {}
4475     self.share_locks[locking.LEVEL_NODE] = 1
4476     if not self.op.nodes:
4477       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4478     else:
4479       self.needed_locks[locking.LEVEL_NODE] = \
4480         _GetWantedNodes(self, self.op.nodes)
4481
4482   def Exec(self, feedback_fn):
4483     """Computes the list of nodes and their attributes.
4484
4485     """
4486     nodenames = self.owned_locks(locking.LEVEL_NODE)
4487     volumes = self.rpc.call_node_volumes(nodenames)
4488
4489     ilist = self.cfg.GetAllInstancesInfo()
4490     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4491
4492     output = []
4493     for node in nodenames:
4494       nresult = volumes[node]
4495       if nresult.offline:
4496         continue
4497       msg = nresult.fail_msg
4498       if msg:
4499         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4500         continue
4501
4502       node_vols = sorted(nresult.payload,
4503                          key=operator.itemgetter("dev"))
4504
4505       for vol in node_vols:
4506         node_output = []
4507         for field in self.op.output_fields:
4508           if field == "node":
4509             val = node
4510           elif field == "phys":
4511             val = vol["dev"]
4512           elif field == "vg":
4513             val = vol["vg"]
4514           elif field == "name":
4515             val = vol["name"]
4516           elif field == "size":
4517             val = int(float(vol["size"]))
4518           elif field == "instance":
4519             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4520           else:
4521             raise errors.ParameterError(field)
4522           node_output.append(str(val))
4523
4524         output.append(node_output)
4525
4526     return output
4527
4528
4529 class LUNodeQueryStorage(NoHooksLU):
4530   """Logical unit for getting information on storage units on node(s).
4531
4532   """
4533   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4534   REQ_BGL = False
4535
4536   def CheckArguments(self):
4537     _CheckOutputFields(static=self._FIELDS_STATIC,
4538                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4539                        selected=self.op.output_fields)
4540
4541   def ExpandNames(self):
4542     self.needed_locks = {}
4543     self.share_locks[locking.LEVEL_NODE] = 1
4544
4545     if self.op.nodes:
4546       self.needed_locks[locking.LEVEL_NODE] = \
4547         _GetWantedNodes(self, self.op.nodes)
4548     else:
4549       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4550
4551   def Exec(self, feedback_fn):
4552     """Computes the list of nodes and their attributes.
4553
4554     """
4555     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4556
4557     # Always get name to sort by
4558     if constants.SF_NAME in self.op.output_fields:
4559       fields = self.op.output_fields[:]
4560     else:
4561       fields = [constants.SF_NAME] + self.op.output_fields
4562
4563     # Never ask for node or type as it's only known to the LU
4564     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4565       while extra in fields:
4566         fields.remove(extra)
4567
4568     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4569     name_idx = field_idx[constants.SF_NAME]
4570
4571     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4572     data = self.rpc.call_storage_list(self.nodes,
4573                                       self.op.storage_type, st_args,
4574                                       self.op.name, fields)
4575
4576     result = []
4577
4578     for node in utils.NiceSort(self.nodes):
4579       nresult = data[node]
4580       if nresult.offline:
4581         continue
4582
4583       msg = nresult.fail_msg
4584       if msg:
4585         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4586         continue
4587
4588       rows = dict([(row[name_idx], row) for row in nresult.payload])
4589
4590       for name in utils.NiceSort(rows.keys()):
4591         row = rows[name]
4592
4593         out = []
4594
4595         for field in self.op.output_fields:
4596           if field == constants.SF_NODE:
4597             val = node
4598           elif field == constants.SF_TYPE:
4599             val = self.op.storage_type
4600           elif field in field_idx:
4601             val = row[field_idx[field]]
4602           else:
4603             raise errors.ParameterError(field)
4604
4605           out.append(val)
4606
4607         result.append(out)
4608
4609     return result
4610
4611
4612 class _InstanceQuery(_QueryBase):
4613   FIELDS = query.INSTANCE_FIELDS
4614
4615   def ExpandNames(self, lu):
4616     lu.needed_locks = {}
4617     lu.share_locks = _ShareAll()
4618
4619     if self.names:
4620       self.wanted = _GetWantedInstances(lu, self.names)
4621     else:
4622       self.wanted = locking.ALL_SET
4623
4624     self.do_locking = (self.use_locking and
4625                        query.IQ_LIVE in self.requested_data)
4626     if self.do_locking:
4627       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4628       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4629       lu.needed_locks[locking.LEVEL_NODE] = []
4630       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4631
4632     self.do_grouplocks = (self.do_locking and
4633                           query.IQ_NODES in self.requested_data)
4634
4635   def DeclareLocks(self, lu, level):
4636     if self.do_locking:
4637       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4638         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4639
4640         # Lock all groups used by instances optimistically; this requires going
4641         # via the node before it's locked, requiring verification later on
4642         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4643           set(group_uuid
4644               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4645               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4646       elif level == locking.LEVEL_NODE:
4647         lu._LockInstancesNodes() # pylint: disable=W0212
4648
4649   @staticmethod
4650   def _CheckGroupLocks(lu):
4651     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4652     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4653
4654     # Check if node groups for locked instances are still correct
4655     for instance_name in owned_instances:
4656       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4657
4658   def _GetQueryData(self, lu):
4659     """Computes the list of instances and their attributes.
4660
4661     """
4662     if self.do_grouplocks:
4663       self._CheckGroupLocks(lu)
4664
4665     cluster = lu.cfg.GetClusterInfo()
4666     all_info = lu.cfg.GetAllInstancesInfo()
4667
4668     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4669
4670     instance_list = [all_info[name] for name in instance_names]
4671     nodes = frozenset(itertools.chain(*(inst.all_nodes
4672                                         for inst in instance_list)))
4673     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4674     bad_nodes = []
4675     offline_nodes = []
4676     wrongnode_inst = set()
4677
4678     # Gather data as requested
4679     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4680       live_data = {}
4681       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4682       for name in nodes:
4683         result = node_data[name]
4684         if result.offline:
4685           # offline nodes will be in both lists
4686           assert result.fail_msg
4687           offline_nodes.append(name)
4688         if result.fail_msg:
4689           bad_nodes.append(name)
4690         elif result.payload:
4691           for inst in result.payload:
4692             if inst in all_info:
4693               if all_info[inst].primary_node == name:
4694                 live_data.update(result.payload)
4695               else:
4696                 wrongnode_inst.add(inst)
4697             else:
4698               # orphan instance; we don't list it here as we don't
4699               # handle this case yet in the output of instance listing
4700               logging.warning("Orphan instance '%s' found on node %s",
4701                               inst, name)
4702         # else no instance is alive
4703     else:
4704       live_data = {}
4705
4706     if query.IQ_DISKUSAGE in self.requested_data:
4707       disk_usage = dict((inst.name,
4708                          _ComputeDiskSize(inst.disk_template,
4709                                           [{constants.IDISK_SIZE: disk.size}
4710                                            for disk in inst.disks]))
4711                         for inst in instance_list)
4712     else:
4713       disk_usage = None
4714
4715     if query.IQ_CONSOLE in self.requested_data:
4716       consinfo = {}
4717       for inst in instance_list:
4718         if inst.name in live_data:
4719           # Instance is running
4720           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4721         else:
4722           consinfo[inst.name] = None
4723       assert set(consinfo.keys()) == set(instance_names)
4724     else:
4725       consinfo = None
4726
4727     if query.IQ_NODES in self.requested_data:
4728       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4729                                             instance_list)))
4730       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4731       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4732                     for uuid in set(map(operator.attrgetter("group"),
4733                                         nodes.values())))
4734     else:
4735       nodes = None
4736       groups = None
4737
4738     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4739                                    disk_usage, offline_nodes, bad_nodes,
4740                                    live_data, wrongnode_inst, consinfo,
4741                                    nodes, groups)
4742
4743
4744 class LUQuery(NoHooksLU):
4745   """Query for resources/items of a certain kind.
4746
4747   """
4748   # pylint: disable=W0142
4749   REQ_BGL = False
4750
4751   def CheckArguments(self):
4752     qcls = _GetQueryImplementation(self.op.what)
4753
4754     self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4755
4756   def ExpandNames(self):
4757     self.impl.ExpandNames(self)
4758
4759   def DeclareLocks(self, level):
4760     self.impl.DeclareLocks(self, level)
4761
4762   def Exec(self, feedback_fn):
4763     return self.impl.NewStyleQuery(self)
4764
4765
4766 class LUQueryFields(NoHooksLU):
4767   """Query for resources/items of a certain kind.
4768
4769   """
4770   # pylint: disable=W0142
4771   REQ_BGL = False
4772
4773   def CheckArguments(self):
4774     self.qcls = _GetQueryImplementation(self.op.what)
4775
4776   def ExpandNames(self):
4777     self.needed_locks = {}
4778
4779   def Exec(self, feedback_fn):
4780     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4781
4782
4783 class LUNodeModifyStorage(NoHooksLU):
4784   """Logical unit for modifying a storage volume on a node.
4785
4786   """
4787   REQ_BGL = False
4788
4789   def CheckArguments(self):
4790     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4791
4792     storage_type = self.op.storage_type
4793
4794     try:
4795       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4796     except KeyError:
4797       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4798                                  " modified" % storage_type,
4799                                  errors.ECODE_INVAL)
4800
4801     diff = set(self.op.changes.keys()) - modifiable
4802     if diff:
4803       raise errors.OpPrereqError("The following fields can not be modified for"
4804                                  " storage units of type '%s': %r" %
4805                                  (storage_type, list(diff)),
4806                                  errors.ECODE_INVAL)
4807
4808   def ExpandNames(self):
4809     self.needed_locks = {
4810       locking.LEVEL_NODE: self.op.node_name,
4811       }
4812
4813   def Exec(self, feedback_fn):
4814     """Computes the list of nodes and their attributes.
4815
4816     """
4817     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4818     result = self.rpc.call_storage_modify(self.op.node_name,
4819                                           self.op.storage_type, st_args,
4820                                           self.op.name, self.op.changes)
4821     result.Raise("Failed to modify storage unit '%s' on %s" %
4822                  (self.op.name, self.op.node_name))
4823
4824
4825 class LUNodeAdd(LogicalUnit):
4826   """Logical unit for adding node to the cluster.
4827
4828   """
4829   HPATH = "node-add"
4830   HTYPE = constants.HTYPE_NODE
4831   _NFLAGS = ["master_capable", "vm_capable"]
4832
4833   def CheckArguments(self):
4834     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4835     # validate/normalize the node name
4836     self.hostname = netutils.GetHostname(name=self.op.node_name,
4837                                          family=self.primary_ip_family)
4838     self.op.node_name = self.hostname.name
4839
4840     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4841       raise errors.OpPrereqError("Cannot readd the master node",
4842                                  errors.ECODE_STATE)
4843
4844     if self.op.readd and self.op.group:
4845       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4846                                  " being readded", errors.ECODE_INVAL)
4847
4848   def BuildHooksEnv(self):
4849     """Build hooks env.
4850
4851     This will run on all nodes before, and on all nodes + the new node after.
4852
4853     """
4854     return {
4855       "OP_TARGET": self.op.node_name,
4856       "NODE_NAME": self.op.node_name,
4857       "NODE_PIP": self.op.primary_ip,
4858       "NODE_SIP": self.op.secondary_ip,
4859       "MASTER_CAPABLE": str(self.op.master_capable),
4860       "VM_CAPABLE": str(self.op.vm_capable),
4861       }
4862
4863   def BuildHooksNodes(self):
4864     """Build hooks nodes.
4865
4866     """
4867     # Exclude added node
4868     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4869     post_nodes = pre_nodes + [self.op.node_name, ]
4870
4871     return (pre_nodes, post_nodes)
4872
4873   def CheckPrereq(self):
4874     """Check prerequisites.
4875
4876     This checks:
4877      - the new node is not already in the config
4878      - it is resolvable
4879      - its parameters (single/dual homed) matches the cluster
4880
4881     Any errors are signaled by raising errors.OpPrereqError.
4882
4883     """
4884     cfg = self.cfg
4885     hostname = self.hostname
4886     node = hostname.name
4887     primary_ip = self.op.primary_ip = hostname.ip
4888     if self.op.secondary_ip is None:
4889       if self.primary_ip_family == netutils.IP6Address.family:
4890         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4891                                    " IPv4 address must be given as secondary",
4892                                    errors.ECODE_INVAL)
4893       self.op.secondary_ip = primary_ip
4894
4895     secondary_ip = self.op.secondary_ip
4896     if not netutils.IP4Address.IsValid(secondary_ip):
4897       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4898                                  " address" % secondary_ip, errors.ECODE_INVAL)
4899
4900     node_list = cfg.GetNodeList()
4901     if not self.op.readd and node in node_list:
4902       raise errors.OpPrereqError("Node %s is already in the configuration" %
4903                                  node, errors.ECODE_EXISTS)
4904     elif self.op.readd and node not in node_list:
4905       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4906                                  errors.ECODE_NOENT)
4907
4908     self.changed_primary_ip = False
4909
4910     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4911       if self.op.readd and node == existing_node_name:
4912         if existing_node.secondary_ip != secondary_ip:
4913           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4914                                      " address configuration as before",
4915                                      errors.ECODE_INVAL)
4916         if existing_node.primary_ip != primary_ip:
4917           self.changed_primary_ip = True
4918
4919         continue
4920
4921       if (existing_node.primary_ip == primary_ip or
4922           existing_node.secondary_ip == primary_ip or
4923           existing_node.primary_ip == secondary_ip or
4924           existing_node.secondary_ip == secondary_ip):
4925         raise errors.OpPrereqError("New node ip address(es) conflict with"
4926                                    " existing node %s" % existing_node.name,
4927                                    errors.ECODE_NOTUNIQUE)
4928
4929     # After this 'if' block, None is no longer a valid value for the
4930     # _capable op attributes
4931     if self.op.readd:
4932       old_node = self.cfg.GetNodeInfo(node)
4933       assert old_node is not None, "Can't retrieve locked node %s" % node
4934       for attr in self._NFLAGS:
4935         if getattr(self.op, attr) is None:
4936           setattr(self.op, attr, getattr(old_node, attr))
4937     else:
4938       for attr in self._NFLAGS:
4939         if getattr(self.op, attr) is None:
4940           setattr(self.op, attr, True)
4941
4942     if self.op.readd and not self.op.vm_capable:
4943       pri, sec = cfg.GetNodeInstances(node)
4944       if pri or sec:
4945         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4946                                    " flag set to false, but it already holds"
4947                                    " instances" % node,
4948                                    errors.ECODE_STATE)
4949
4950     # check that the type of the node (single versus dual homed) is the
4951     # same as for the master
4952     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4953     master_singlehomed = myself.secondary_ip == myself.primary_ip
4954     newbie_singlehomed = secondary_ip == primary_ip
4955     if master_singlehomed != newbie_singlehomed:
4956       if master_singlehomed:
4957         raise errors.OpPrereqError("The master has no secondary ip but the"
4958                                    " new node has one",
4959                                    errors.ECODE_INVAL)
4960       else:
4961         raise errors.OpPrereqError("The master has a secondary ip but the"
4962                                    " new node doesn't have one",
4963                                    errors.ECODE_INVAL)
4964
4965     # checks reachability
4966     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4967       raise errors.OpPrereqError("Node not reachable by ping",
4968                                  errors.ECODE_ENVIRON)
4969
4970     if not newbie_singlehomed:
4971       # check reachability from my secondary ip to newbie's secondary ip
4972       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4973                            source=myself.secondary_ip):
4974         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4975                                    " based ping to node daemon port",
4976                                    errors.ECODE_ENVIRON)
4977
4978     if self.op.readd:
4979       exceptions = [node]
4980     else:
4981       exceptions = []
4982
4983     if self.op.master_capable:
4984       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4985     else:
4986       self.master_candidate = False
4987
4988     if self.op.readd:
4989       self.new_node = old_node
4990     else:
4991       node_group = cfg.LookupNodeGroup(self.op.group)
4992       self.new_node = objects.Node(name=node,
4993                                    primary_ip=primary_ip,
4994                                    secondary_ip=secondary_ip,
4995                                    master_candidate=self.master_candidate,
4996                                    offline=False, drained=False,
4997                                    group=node_group)
4998
4999     if self.op.ndparams:
5000       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5001
5002   def Exec(self, feedback_fn):
5003     """Adds the new node to the cluster.
5004
5005     """
5006     new_node = self.new_node
5007     node = new_node.name
5008
5009     # We adding a new node so we assume it's powered
5010     new_node.powered = True
5011
5012     # for re-adds, reset the offline/drained/master-candidate flags;
5013     # we need to reset here, otherwise offline would prevent RPC calls
5014     # later in the procedure; this also means that if the re-add
5015     # fails, we are left with a non-offlined, broken node
5016     if self.op.readd:
5017       new_node.drained = new_node.offline = False # pylint: disable=W0201
5018       self.LogInfo("Readding a node, the offline/drained flags were reset")
5019       # if we demote the node, we do cleanup later in the procedure
5020       new_node.master_candidate = self.master_candidate
5021       if self.changed_primary_ip:
5022         new_node.primary_ip = self.op.primary_ip
5023
5024     # copy the master/vm_capable flags
5025     for attr in self._NFLAGS:
5026       setattr(new_node, attr, getattr(self.op, attr))
5027
5028     # notify the user about any possible mc promotion
5029     if new_node.master_candidate:
5030       self.LogInfo("Node will be a master candidate")
5031
5032     if self.op.ndparams:
5033       new_node.ndparams = self.op.ndparams
5034     else:
5035       new_node.ndparams = {}
5036
5037     # check connectivity
5038     result = self.rpc.call_version([node])[node]
5039     result.Raise("Can't get version information from node %s" % node)
5040     if constants.PROTOCOL_VERSION == result.payload:
5041       logging.info("Communication to node %s fine, sw version %s match",
5042                    node, result.payload)
5043     else:
5044       raise errors.OpExecError("Version mismatch master version %s,"
5045                                " node version %s" %
5046                                (constants.PROTOCOL_VERSION, result.payload))
5047
5048     # Add node to our /etc/hosts, and add key to known_hosts
5049     if self.cfg.GetClusterInfo().modify_etc_hosts:
5050       master_node = self.cfg.GetMasterNode()
5051       result = self.rpc.call_etc_hosts_modify(master_node,
5052                                               constants.ETC_HOSTS_ADD,
5053                                               self.hostname.name,
5054                                               self.hostname.ip)
5055       result.Raise("Can't update hosts file with new host data")
5056
5057     if new_node.secondary_ip != new_node.primary_ip:
5058       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5059                                False)
5060
5061     node_verify_list = [self.cfg.GetMasterNode()]
5062     node_verify_param = {
5063       constants.NV_NODELIST: ([node], {}),
5064       # TODO: do a node-net-test as well?
5065     }
5066
5067     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5068                                        self.cfg.GetClusterName())
5069     for verifier in node_verify_list:
5070       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5071       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5072       if nl_payload:
5073         for failed in nl_payload:
5074           feedback_fn("ssh/hostname verification failed"
5075                       " (checking from %s): %s" %
5076                       (verifier, nl_payload[failed]))
5077         raise errors.OpExecError("ssh/hostname verification failed")
5078
5079     if self.op.readd:
5080       _RedistributeAncillaryFiles(self)
5081       self.context.ReaddNode(new_node)
5082       # make sure we redistribute the config
5083       self.cfg.Update(new_node, feedback_fn)
5084       # and make sure the new node will not have old files around
5085       if not new_node.master_candidate:
5086         result = self.rpc.call_node_demote_from_mc(new_node.name)
5087         msg = result.fail_msg
5088         if msg:
5089           self.LogWarning("Node failed to demote itself from master"
5090                           " candidate status: %s" % msg)
5091     else:
5092       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5093                                   additional_vm=self.op.vm_capable)
5094       self.context.AddNode(new_node, self.proc.GetECId())
5095
5096
5097 class LUNodeSetParams(LogicalUnit):
5098   """Modifies the parameters of a node.
5099
5100   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5101       to the node role (as _ROLE_*)
5102   @cvar _R2F: a dictionary from node role to tuples of flags
5103   @cvar _FLAGS: a list of attribute names corresponding to the flags
5104
5105   """
5106   HPATH = "node-modify"
5107   HTYPE = constants.HTYPE_NODE
5108   REQ_BGL = False
5109   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5110   _F2R = {
5111     (True, False, False): _ROLE_CANDIDATE,
5112     (False, True, False): _ROLE_DRAINED,
5113     (False, False, True): _ROLE_OFFLINE,
5114     (False, False, False): _ROLE_REGULAR,
5115     }
5116   _R2F = dict((v, k) for k, v in _F2R.items())
5117   _FLAGS = ["master_candidate", "drained", "offline"]
5118
5119   def CheckArguments(self):
5120     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5121     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5122                 self.op.master_capable, self.op.vm_capable,
5123                 self.op.secondary_ip, self.op.ndparams]
5124     if all_mods.count(None) == len(all_mods):
5125       raise errors.OpPrereqError("Please pass at least one modification",
5126                                  errors.ECODE_INVAL)
5127     if all_mods.count(True) > 1:
5128       raise errors.OpPrereqError("Can't set the node into more than one"
5129                                  " state at the same time",
5130                                  errors.ECODE_INVAL)
5131
5132     # Boolean value that tells us whether we might be demoting from MC
5133     self.might_demote = (self.op.master_candidate == False or
5134                          self.op.offline == True or
5135                          self.op.drained == True or
5136                          self.op.master_capable == False)
5137
5138     if self.op.secondary_ip:
5139       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5140         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5141                                    " address" % self.op.secondary_ip,
5142                                    errors.ECODE_INVAL)
5143
5144     self.lock_all = self.op.auto_promote and self.might_demote
5145     self.lock_instances = self.op.secondary_ip is not None
5146
5147   def ExpandNames(self):
5148     if self.lock_all:
5149       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5150     else:
5151       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5152
5153     if self.lock_instances:
5154       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5155
5156   def DeclareLocks(self, level):
5157     # If we have locked all instances, before waiting to lock nodes, release
5158     # all the ones living on nodes unrelated to the current operation.
5159     if level == locking.LEVEL_NODE and self.lock_instances:
5160       self.affected_instances = []
5161       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5162         instances_keep = []
5163
5164         # Build list of instances to release
5165         locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5166         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5167           if (instance.disk_template in constants.DTS_INT_MIRROR and
5168               self.op.node_name in instance.all_nodes):
5169             instances_keep.append(instance_name)
5170             self.affected_instances.append(instance)
5171
5172         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5173
5174         assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5175                 set(instances_keep))
5176
5177   def BuildHooksEnv(self):
5178     """Build hooks env.
5179
5180     This runs on the master node.
5181
5182     """
5183     return {
5184       "OP_TARGET": self.op.node_name,
5185       "MASTER_CANDIDATE": str(self.op.master_candidate),
5186       "OFFLINE": str(self.op.offline),
5187       "DRAINED": str(self.op.drained),
5188       "MASTER_CAPABLE": str(self.op.master_capable),
5189       "VM_CAPABLE": str(self.op.vm_capable),
5190       }
5191
5192   def BuildHooksNodes(self):
5193     """Build hooks nodes.
5194
5195     """
5196     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5197     return (nl, nl)
5198
5199   def CheckPrereq(self):
5200     """Check prerequisites.
5201
5202     This only checks the instance list against the existing names.
5203
5204     """
5205     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5206
5207     if (self.op.master_candidate is not None or
5208         self.op.drained is not None or
5209         self.op.offline is not None):
5210       # we can't change the master's node flags
5211       if self.op.node_name == self.cfg.GetMasterNode():
5212         raise errors.OpPrereqError("The master role can be changed"
5213                                    " only via master-failover",
5214                                    errors.ECODE_INVAL)
5215
5216     if self.op.master_candidate and not node.master_capable:
5217       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5218                                  " it a master candidate" % node.name,
5219                                  errors.ECODE_STATE)
5220
5221     if self.op.vm_capable == False:
5222       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5223       if ipri or isec:
5224         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5225                                    " the vm_capable flag" % node.name,
5226                                    errors.ECODE_STATE)
5227
5228     if node.master_candidate and self.might_demote and not self.lock_all:
5229       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5230       # check if after removing the current node, we're missing master
5231       # candidates
5232       (mc_remaining, mc_should, _) = \
5233           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5234       if mc_remaining < mc_should:
5235         raise errors.OpPrereqError("Not enough master candidates, please"
5236                                    " pass auto promote option to allow"
5237                                    " promotion", errors.ECODE_STATE)
5238
5239     self.old_flags = old_flags = (node.master_candidate,
5240                                   node.drained, node.offline)
5241     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5242     self.old_role = old_role = self._F2R[old_flags]
5243
5244     # Check for ineffective changes
5245     for attr in self._FLAGS:
5246       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5247         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5248         setattr(self.op, attr, None)
5249
5250     # Past this point, any flag change to False means a transition
5251     # away from the respective state, as only real changes are kept
5252
5253     # TODO: We might query the real power state if it supports OOB
5254     if _SupportsOob(self.cfg, node):
5255       if self.op.offline is False and not (node.powered or
5256                                            self.op.powered == True):
5257         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5258                                     " offline status can be reset") %
5259                                    self.op.node_name)
5260     elif self.op.powered is not None:
5261       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5262                                   " as it does not support out-of-band"
5263                                   " handling") % self.op.node_name)
5264
5265     # If we're being deofflined/drained, we'll MC ourself if needed
5266     if (self.op.drained == False or self.op.offline == False or
5267         (self.op.master_capable and not node.master_capable)):
5268       if _DecideSelfPromotion(self):
5269         self.op.master_candidate = True
5270         self.LogInfo("Auto-promoting node to master candidate")
5271
5272     # If we're no longer master capable, we'll demote ourselves from MC
5273     if self.op.master_capable == False and node.master_candidate:
5274       self.LogInfo("Demoting from master candidate")
5275       self.op.master_candidate = False
5276
5277     # Compute new role
5278     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5279     if self.op.master_candidate:
5280       new_role = self._ROLE_CANDIDATE
5281     elif self.op.drained:
5282       new_role = self._ROLE_DRAINED
5283     elif self.op.offline:
5284       new_role = self._ROLE_OFFLINE
5285     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5286       # False is still in new flags, which means we're un-setting (the
5287       # only) True flag
5288       new_role = self._ROLE_REGULAR
5289     else: # no new flags, nothing, keep old role
5290       new_role = old_role
5291
5292     self.new_role = new_role
5293
5294     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5295       # Trying to transition out of offline status
5296       result = self.rpc.call_version([node.name])[node.name]
5297       if result.fail_msg:
5298         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5299                                    " to report its version: %s" %
5300                                    (node.name, result.fail_msg),
5301                                    errors.ECODE_STATE)
5302       else:
5303         self.LogWarning("Transitioning node from offline to online state"
5304                         " without using re-add. Please make sure the node"
5305                         " is healthy!")
5306
5307     if self.op.secondary_ip:
5308       # Ok even without locking, because this can't be changed by any LU
5309       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5310       master_singlehomed = master.secondary_ip == master.primary_ip
5311       if master_singlehomed and self.op.secondary_ip:
5312         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5313                                    " homed cluster", errors.ECODE_INVAL)
5314
5315       if node.offline:
5316         if self.affected_instances:
5317           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5318                                      " node has instances (%s) configured"
5319                                      " to use it" % self.affected_instances)
5320       else:
5321         # On online nodes, check that no instances are running, and that
5322         # the node has the new ip and we can reach it.
5323         for instance in self.affected_instances:
5324           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5325
5326         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5327         if master.name != node.name:
5328           # check reachability from master secondary ip to new secondary ip
5329           if not netutils.TcpPing(self.op.secondary_ip,
5330                                   constants.DEFAULT_NODED_PORT,
5331                                   source=master.secondary_ip):
5332             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5333                                        " based ping to node daemon port",
5334                                        errors.ECODE_ENVIRON)
5335
5336     if self.op.ndparams:
5337       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5338       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5339       self.new_ndparams = new_ndparams
5340
5341   def Exec(self, feedback_fn):
5342     """Modifies a node.
5343
5344     """
5345     node = self.node
5346     old_role = self.old_role
5347     new_role = self.new_role
5348
5349     result = []
5350
5351     if self.op.ndparams:
5352       node.ndparams = self.new_ndparams
5353
5354     if self.op.powered is not None:
5355       node.powered = self.op.powered
5356
5357     for attr in ["master_capable", "vm_capable"]:
5358       val = getattr(self.op, attr)
5359       if val is not None:
5360         setattr(node, attr, val)
5361         result.append((attr, str(val)))
5362
5363     if new_role != old_role:
5364       # Tell the node to demote itself, if no longer MC and not offline
5365       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5366         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5367         if msg:
5368           self.LogWarning("Node failed to demote itself: %s", msg)
5369
5370       new_flags = self._R2F[new_role]
5371       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5372         if of != nf:
5373           result.append((desc, str(nf)))
5374       (node.master_candidate, node.drained, node.offline) = new_flags
5375
5376       # we locked all nodes, we adjust the CP before updating this node
5377       if self.lock_all:
5378         _AdjustCandidatePool(self, [node.name])
5379
5380     if self.op.secondary_ip:
5381       node.secondary_ip = self.op.secondary_ip
5382       result.append(("secondary_ip", self.op.secondary_ip))
5383
5384     # this will trigger configuration file update, if needed
5385     self.cfg.Update(node, feedback_fn)
5386
5387     # this will trigger job queue propagation or cleanup if the mc
5388     # flag changed
5389     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5390       self.context.ReaddNode(node)
5391
5392     return result
5393
5394
5395 class LUNodePowercycle(NoHooksLU):
5396   """Powercycles a node.
5397
5398   """
5399   REQ_BGL = False
5400
5401   def CheckArguments(self):
5402     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5403     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5404       raise errors.OpPrereqError("The node is the master and the force"
5405                                  " parameter was not set",
5406                                  errors.ECODE_INVAL)
5407
5408   def ExpandNames(self):
5409     """Locking for PowercycleNode.
5410
5411     This is a last-resort option and shouldn't block on other
5412     jobs. Therefore, we grab no locks.
5413
5414     """
5415     self.needed_locks = {}
5416
5417   def Exec(self, feedback_fn):
5418     """Reboots a node.
5419
5420     """
5421     result = self.rpc.call_node_powercycle(self.op.node_name,
5422                                            self.cfg.GetHypervisorType())
5423     result.Raise("Failed to schedule the reboot")
5424     return result.payload
5425
5426
5427 class LUClusterQuery(NoHooksLU):
5428   """Query cluster configuration.
5429
5430   """
5431   REQ_BGL = False
5432
5433   def ExpandNames(self):
5434     self.needed_locks = {}
5435
5436   def Exec(self, feedback_fn):
5437     """Return cluster config.
5438
5439     """
5440     cluster = self.cfg.GetClusterInfo()
5441     os_hvp = {}
5442
5443     # Filter just for enabled hypervisors
5444     for os_name, hv_dict in cluster.os_hvp.items():
5445       os_hvp[os_name] = {}
5446       for hv_name, hv_params in hv_dict.items():
5447         if hv_name in cluster.enabled_hypervisors:
5448           os_hvp[os_name][hv_name] = hv_params
5449
5450     # Convert ip_family to ip_version
5451     primary_ip_version = constants.IP4_VERSION
5452     if cluster.primary_ip_family == netutils.IP6Address.family:
5453       primary_ip_version = constants.IP6_VERSION
5454
5455     result = {
5456       "software_version": constants.RELEASE_VERSION,
5457       "protocol_version": constants.PROTOCOL_VERSION,
5458       "config_version": constants.CONFIG_VERSION,
5459       "os_api_version": max(constants.OS_API_VERSIONS),
5460       "export_version": constants.EXPORT_VERSION,
5461       "architecture": (platform.architecture()[0], platform.machine()),
5462       "name": cluster.cluster_name,
5463       "master": cluster.master_node,
5464       "default_hypervisor": cluster.enabled_hypervisors[0],
5465       "enabled_hypervisors": cluster.enabled_hypervisors,
5466       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5467                         for hypervisor_name in cluster.enabled_hypervisors]),
5468       "os_hvp": os_hvp,
5469       "beparams": cluster.beparams,
5470       "osparams": cluster.osparams,
5471       "nicparams": cluster.nicparams,
5472       "ndparams": cluster.ndparams,
5473       "candidate_pool_size": cluster.candidate_pool_size,
5474       "master_netdev": cluster.master_netdev,
5475       "volume_group_name": cluster.volume_group_name,
5476       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5477       "file_storage_dir": cluster.file_storage_dir,
5478       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5479       "maintain_node_health": cluster.maintain_node_health,
5480       "ctime": cluster.ctime,
5481       "mtime": cluster.mtime,
5482       "uuid": cluster.uuid,
5483       "tags": list(cluster.GetTags()),
5484       "uid_pool": cluster.uid_pool,
5485       "default_iallocator": cluster.default_iallocator,
5486       "reserved_lvs": cluster.reserved_lvs,
5487       "primary_ip_version": primary_ip_version,
5488       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5489       "hidden_os": cluster.hidden_os,
5490       "blacklisted_os": cluster.blacklisted_os,
5491       }
5492
5493     return result
5494
5495
5496 class LUClusterConfigQuery(NoHooksLU):
5497   """Return configuration values.
5498
5499   """
5500   REQ_BGL = False
5501   _FIELDS_DYNAMIC = utils.FieldSet()
5502   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5503                                   "watcher_pause", "volume_group_name")
5504
5505   def CheckArguments(self):
5506     _CheckOutputFields(static=self._FIELDS_STATIC,
5507                        dynamic=self._FIELDS_DYNAMIC,
5508                        selected=self.op.output_fields)
5509
5510   def ExpandNames(self):
5511     self.needed_locks = {}
5512
5513   def Exec(self, feedback_fn):
5514     """Dump a representation of the cluster config to the standard output.
5515
5516     """
5517     values = []
5518     for field in self.op.output_fields:
5519       if field == "cluster_name":
5520         entry = self.cfg.GetClusterName()
5521       elif field == "master_node":
5522         entry = self.cfg.GetMasterNode()
5523       elif field == "drain_flag":
5524         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5525       elif field == "watcher_pause":
5526         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5527       elif field == "volume_group_name":
5528         entry = self.cfg.GetVGName()
5529       else:
5530         raise errors.ParameterError(field)
5531       values.append(entry)
5532     return values
5533
5534
5535 class LUInstanceActivateDisks(NoHooksLU):
5536   """Bring up an instance's disks.
5537
5538   """
5539   REQ_BGL = False
5540
5541   def ExpandNames(self):
5542     self._ExpandAndLockInstance()
5543     self.needed_locks[locking.LEVEL_NODE] = []
5544     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5545
5546   def DeclareLocks(self, level):
5547     if level == locking.LEVEL_NODE:
5548       self._LockInstancesNodes()
5549
5550   def CheckPrereq(self):
5551     """Check prerequisites.
5552
5553     This checks that the instance is in the cluster.
5554
5555     """
5556     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5557     assert self.instance is not None, \
5558       "Cannot retrieve locked instance %s" % self.op.instance_name
5559     _CheckNodeOnline(self, self.instance.primary_node)
5560
5561   def Exec(self, feedback_fn):
5562     """Activate the disks.
5563
5564     """
5565     disks_ok, disks_info = \
5566               _AssembleInstanceDisks(self, self.instance,
5567                                      ignore_size=self.op.ignore_size)
5568     if not disks_ok:
5569       raise errors.OpExecError("Cannot activate block devices")
5570
5571     return disks_info
5572
5573
5574 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5575                            ignore_size=False):
5576   """Prepare the block devices for an instance.
5577
5578   This sets up the block devices on all nodes.
5579
5580   @type lu: L{LogicalUnit}
5581   @param lu: the logical unit on whose behalf we execute
5582   @type instance: L{objects.Instance}
5583   @param instance: the instance for whose disks we assemble
5584   @type disks: list of L{objects.Disk} or None
5585   @param disks: which disks to assemble (or all, if None)
5586   @type ignore_secondaries: boolean
5587   @param ignore_secondaries: if true, errors on secondary nodes
5588       won't result in an error return from the function
5589   @type ignore_size: boolean
5590   @param ignore_size: if true, the current known size of the disk
5591       will not be used during the disk activation, useful for cases
5592       when the size is wrong
5593   @return: False if the operation failed, otherwise a list of
5594       (host, instance_visible_name, node_visible_name)
5595       with the mapping from node devices to instance devices
5596
5597   """
5598   device_info = []
5599   disks_ok = True
5600   iname = instance.name
5601   disks = _ExpandCheckDisks(instance, disks)
5602
5603   # With the two passes mechanism we try to reduce the window of
5604   # opportunity for the race condition of switching DRBD to primary
5605   # before handshaking occured, but we do not eliminate it
5606
5607   # The proper fix would be to wait (with some limits) until the
5608   # connection has been made and drbd transitions from WFConnection
5609   # into any other network-connected state (Connected, SyncTarget,
5610   # SyncSource, etc.)
5611
5612   # 1st pass, assemble on all nodes in secondary mode
5613   for idx, inst_disk in enumerate(disks):
5614     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5615       if ignore_size:
5616         node_disk = node_disk.Copy()
5617         node_disk.UnsetSize()
5618       lu.cfg.SetDiskID(node_disk, node)
5619       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5620       msg = result.fail_msg
5621       if msg:
5622         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5623                            " (is_primary=False, pass=1): %s",
5624                            inst_disk.iv_name, node, msg)
5625         if not ignore_secondaries:
5626           disks_ok = False
5627
5628   # FIXME: race condition on drbd migration to primary
5629
5630   # 2nd pass, do only the primary node
5631   for idx, inst_disk in enumerate(disks):
5632     dev_path = None
5633
5634     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5635       if node != instance.primary_node:
5636         continue
5637       if ignore_size:
5638         node_disk = node_disk.Copy()
5639         node_disk.UnsetSize()
5640       lu.cfg.SetDiskID(node_disk, node)
5641       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5642       msg = result.fail_msg
5643       if msg:
5644         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5645                            " (is_primary=True, pass=2): %s",
5646                            inst_disk.iv_name, node, msg)
5647         disks_ok = False
5648       else:
5649         dev_path = result.payload
5650
5651     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5652
5653   # leave the disks configured for the primary node
5654   # this is a workaround that would be fixed better by
5655   # improving the logical/physical id handling
5656   for disk in disks:
5657     lu.cfg.SetDiskID(disk, instance.primary_node)
5658
5659   return disks_ok, device_info
5660
5661
5662 def _StartInstanceDisks(lu, instance, force):
5663   """Start the disks of an instance.
5664
5665   """
5666   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5667                                            ignore_secondaries=force)
5668   if not disks_ok:
5669     _ShutdownInstanceDisks(lu, instance)
5670     if force is not None and not force:
5671       lu.proc.LogWarning("", hint="If the message above refers to a"
5672                          " secondary node,"
5673                          " you can retry the operation using '--force'.")
5674     raise errors.OpExecError("Disk consistency error")
5675
5676
5677 class LUInstanceDeactivateDisks(NoHooksLU):
5678   """Shutdown an instance's disks.
5679
5680   """
5681   REQ_BGL = False
5682
5683   def ExpandNames(self):
5684     self._ExpandAndLockInstance()
5685     self.needed_locks[locking.LEVEL_NODE] = []
5686     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5687
5688   def DeclareLocks(self, level):
5689     if level == locking.LEVEL_NODE:
5690       self._LockInstancesNodes()
5691
5692   def CheckPrereq(self):
5693     """Check prerequisites.
5694
5695     This checks that the instance is in the cluster.
5696
5697     """
5698     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5699     assert self.instance is not None, \
5700       "Cannot retrieve locked instance %s" % self.op.instance_name
5701
5702   def Exec(self, feedback_fn):
5703     """Deactivate the disks
5704
5705     """
5706     instance = self.instance
5707     if self.op.force:
5708       _ShutdownInstanceDisks(self, instance)
5709     else:
5710       _SafeShutdownInstanceDisks(self, instance)
5711
5712
5713 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5714   """Shutdown block devices of an instance.
5715
5716   This function checks if an instance is running, before calling
5717   _ShutdownInstanceDisks.
5718
5719   """
5720   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5721   _ShutdownInstanceDisks(lu, instance, disks=disks)
5722
5723
5724 def _ExpandCheckDisks(instance, disks):
5725   """Return the instance disks selected by the disks list
5726
5727   @type disks: list of L{objects.Disk} or None
5728   @param disks: selected disks
5729   @rtype: list of L{objects.Disk}
5730   @return: selected instance disks to act on
5731
5732   """
5733   if disks is None:
5734     return instance.disks
5735   else:
5736     if not set(disks).issubset(instance.disks):
5737       raise errors.ProgrammerError("Can only act on disks belonging to the"
5738                                    " target instance")
5739     return disks
5740
5741
5742 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5743   """Shutdown block devices of an instance.
5744
5745   This does the shutdown on all nodes of the instance.
5746
5747   If the ignore_primary is false, errors on the primary node are
5748   ignored.
5749
5750   """
5751   all_result = True
5752   disks = _ExpandCheckDisks(instance, disks)
5753
5754   for disk in disks:
5755     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5756       lu.cfg.SetDiskID(top_disk, node)
5757       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5758       msg = result.fail_msg
5759       if msg:
5760         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5761                       disk.iv_name, node, msg)
5762         if ((node == instance.primary_node and not ignore_primary) or
5763             (node != instance.primary_node and not result.offline)):
5764           all_result = False
5765   return all_result
5766
5767
5768 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5769   """Checks if a node has enough free memory.
5770
5771   This function check if a given node has the needed amount of free
5772   memory. In case the node has less memory or we cannot get the
5773   information from the node, this function raise an OpPrereqError
5774   exception.
5775
5776   @type lu: C{LogicalUnit}
5777   @param lu: a logical unit from which we get configuration data
5778   @type node: C{str}
5779   @param node: the node to check
5780   @type reason: C{str}
5781   @param reason: string to use in the error message
5782   @type requested: C{int}
5783   @param requested: the amount of memory in MiB to check for
5784   @type hypervisor_name: C{str}
5785   @param hypervisor_name: the hypervisor to ask for memory stats
5786   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5787       we cannot check the node
5788
5789   """
5790   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5791   nodeinfo[node].Raise("Can't get data from node %s" % node,
5792                        prereq=True, ecode=errors.ECODE_ENVIRON)
5793   free_mem = nodeinfo[node].payload.get("memory_free", None)
5794   if not isinstance(free_mem, int):
5795     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5796                                " was '%s'" % (node, free_mem),
5797                                errors.ECODE_ENVIRON)
5798   if requested > free_mem:
5799     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5800                                " needed %s MiB, available %s MiB" %
5801                                (node, reason, requested, free_mem),
5802                                errors.ECODE_NORES)
5803
5804
5805 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5806   """Checks if nodes have enough free disk space in the all VGs.
5807
5808   This function check if all given nodes have the needed amount of
5809   free disk. In case any node has less disk or we cannot get the
5810   information from the node, this function raise an OpPrereqError
5811   exception.
5812
5813   @type lu: C{LogicalUnit}
5814   @param lu: a logical unit from which we get configuration data
5815   @type nodenames: C{list}
5816   @param nodenames: the list of node names to check
5817   @type req_sizes: C{dict}
5818   @param req_sizes: the hash of vg and corresponding amount of disk in
5819       MiB to check for
5820   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5821       or we cannot check the node
5822
5823   """
5824   for vg, req_size in req_sizes.items():
5825     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5826
5827
5828 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5829   """Checks if nodes have enough free disk space in the specified VG.
5830
5831   This function check if all given nodes have the needed amount of
5832   free disk. In case any node has less disk or we cannot get the
5833   information from the node, this function raise an OpPrereqError
5834   exception.
5835
5836   @type lu: C{LogicalUnit}
5837   @param lu: a logical unit from which we get configuration data
5838   @type nodenames: C{list}
5839   @param nodenames: the list of node names to check
5840   @type vg: C{str}
5841   @param vg: the volume group to check
5842   @type requested: C{int}
5843   @param requested: the amount of disk in MiB to check for
5844   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5845       or we cannot check the node
5846
5847   """
5848   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5849   for node in nodenames:
5850     info = nodeinfo[node]
5851     info.Raise("Cannot get current information from node %s" % node,
5852                prereq=True, ecode=errors.ECODE_ENVIRON)
5853     vg_free = info.payload.get("vg_free", None)
5854     if not isinstance(vg_free, int):
5855       raise errors.OpPrereqError("Can't compute free disk space on node"
5856                                  " %s for vg %s, result was '%s'" %
5857                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5858     if requested > vg_free:
5859       raise errors.OpPrereqError("Not enough disk space on target node %s"
5860                                  " vg %s: required %d MiB, available %d MiB" %
5861                                  (node, vg, requested, vg_free),
5862                                  errors.ECODE_NORES)
5863
5864
5865 class LUInstanceStartup(LogicalUnit):
5866   """Starts an instance.
5867
5868   """
5869   HPATH = "instance-start"
5870   HTYPE = constants.HTYPE_INSTANCE
5871   REQ_BGL = False
5872
5873   def CheckArguments(self):
5874     # extra beparams
5875     if self.op.beparams:
5876       # fill the beparams dict
5877       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5878
5879   def ExpandNames(self):
5880     self._ExpandAndLockInstance()
5881
5882   def BuildHooksEnv(self):
5883     """Build hooks env.
5884
5885     This runs on master, primary and secondary nodes of the instance.
5886
5887     """
5888     env = {
5889       "FORCE": self.op.force,
5890       }
5891
5892     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5893
5894     return env
5895
5896   def BuildHooksNodes(self):
5897     """Build hooks nodes.
5898
5899     """
5900     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5901     return (nl, nl)
5902
5903   def CheckPrereq(self):
5904     """Check prerequisites.
5905
5906     This checks that the instance is in the cluster.
5907
5908     """
5909     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5910     assert self.instance is not None, \
5911       "Cannot retrieve locked instance %s" % self.op.instance_name
5912
5913     # extra hvparams
5914     if self.op.hvparams:
5915       # check hypervisor parameter syntax (locally)
5916       cluster = self.cfg.GetClusterInfo()
5917       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5918       filled_hvp = cluster.FillHV(instance)
5919       filled_hvp.update(self.op.hvparams)
5920       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5921       hv_type.CheckParameterSyntax(filled_hvp)
5922       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5923
5924     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5925
5926     if self.primary_offline and self.op.ignore_offline_nodes:
5927       self.proc.LogWarning("Ignoring offline primary node")
5928
5929       if self.op.hvparams or self.op.beparams:
5930         self.proc.LogWarning("Overridden parameters are ignored")
5931     else:
5932       _CheckNodeOnline(self, instance.primary_node)
5933
5934       bep = self.cfg.GetClusterInfo().FillBE(instance)
5935
5936       # check bridges existence
5937       _CheckInstanceBridgesExist(self, instance)
5938
5939       remote_info = self.rpc.call_instance_info(instance.primary_node,
5940                                                 instance.name,
5941                                                 instance.hypervisor)
5942       remote_info.Raise("Error checking node %s" % instance.primary_node,
5943                         prereq=True, ecode=errors.ECODE_ENVIRON)
5944       if not remote_info.payload: # not running already
5945         _CheckNodeFreeMemory(self, instance.primary_node,
5946                              "starting instance %s" % instance.name,
5947                              bep[constants.BE_MEMORY], instance.hypervisor)
5948
5949   def Exec(self, feedback_fn):
5950     """Start the instance.
5951
5952     """
5953     instance = self.instance
5954     force = self.op.force
5955
5956     if not self.op.no_remember:
5957       self.cfg.MarkInstanceUp(instance.name)
5958
5959     if self.primary_offline:
5960       assert self.op.ignore_offline_nodes
5961       self.proc.LogInfo("Primary node offline, marked instance as started")
5962     else:
5963       node_current = instance.primary_node
5964
5965       _StartInstanceDisks(self, instance, force)
5966
5967       result = self.rpc.call_instance_start(node_current, instance,
5968                                             self.op.hvparams, self.op.beparams,
5969                                             self.op.startup_paused)
5970       msg = result.fail_msg
5971       if msg:
5972         _ShutdownInstanceDisks(self, instance)
5973         raise errors.OpExecError("Could not start instance: %s" % msg)
5974
5975
5976 class LUInstanceReboot(LogicalUnit):
5977   """Reboot an instance.
5978
5979   """
5980   HPATH = "instance-reboot"
5981   HTYPE = constants.HTYPE_INSTANCE
5982   REQ_BGL = False
5983
5984   def ExpandNames(self):
5985     self._ExpandAndLockInstance()
5986
5987   def BuildHooksEnv(self):
5988     """Build hooks env.
5989
5990     This runs on master, primary and secondary nodes of the instance.
5991
5992     """
5993     env = {
5994       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5995       "REBOOT_TYPE": self.op.reboot_type,
5996       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5997       }
5998
5999     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6000
6001     return env
6002
6003   def BuildHooksNodes(self):
6004     """Build hooks nodes.
6005
6006     """
6007     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6008     return (nl, nl)
6009
6010   def CheckPrereq(self):
6011     """Check prerequisites.
6012
6013     This checks that the instance is in the cluster.
6014
6015     """
6016     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6017     assert self.instance is not None, \
6018       "Cannot retrieve locked instance %s" % self.op.instance_name
6019
6020     _CheckNodeOnline(self, instance.primary_node)
6021
6022     # check bridges existence
6023     _CheckInstanceBridgesExist(self, instance)
6024
6025   def Exec(self, feedback_fn):
6026     """Reboot the instance.
6027
6028     """
6029     instance = self.instance
6030     ignore_secondaries = self.op.ignore_secondaries
6031     reboot_type = self.op.reboot_type
6032
6033     remote_info = self.rpc.call_instance_info(instance.primary_node,
6034                                               instance.name,
6035                                               instance.hypervisor)
6036     remote_info.Raise("Error checking node %s" % instance.primary_node)
6037     instance_running = bool(remote_info.payload)
6038
6039     node_current = instance.primary_node
6040
6041     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6042                                             constants.INSTANCE_REBOOT_HARD]:
6043       for disk in instance.disks:
6044         self.cfg.SetDiskID(disk, node_current)
6045       result = self.rpc.call_instance_reboot(node_current, instance,
6046                                              reboot_type,
6047                                              self.op.shutdown_timeout)
6048       result.Raise("Could not reboot instance")
6049     else:
6050       if instance_running:
6051         result = self.rpc.call_instance_shutdown(node_current, instance,
6052                                                  self.op.shutdown_timeout)
6053         result.Raise("Could not shutdown instance for full reboot")
6054         _ShutdownInstanceDisks(self, instance)
6055       else:
6056         self.LogInfo("Instance %s was already stopped, starting now",
6057                      instance.name)
6058       _StartInstanceDisks(self, instance, ignore_secondaries)
6059       result = self.rpc.call_instance_start(node_current, instance,
6060                                             None, None, False)
6061       msg = result.fail_msg
6062       if msg:
6063         _ShutdownInstanceDisks(self, instance)
6064         raise errors.OpExecError("Could not start instance for"
6065                                  " full reboot: %s" % msg)
6066
6067     self.cfg.MarkInstanceUp(instance.name)
6068
6069
6070 class LUInstanceShutdown(LogicalUnit):
6071   """Shutdown an instance.
6072
6073   """
6074   HPATH = "instance-stop"
6075   HTYPE = constants.HTYPE_INSTANCE
6076   REQ_BGL = False
6077
6078   def ExpandNames(self):
6079     self._ExpandAndLockInstance()
6080
6081   def BuildHooksEnv(self):
6082     """Build hooks env.
6083
6084     This runs on master, primary and secondary nodes of the instance.
6085
6086     """
6087     env = _BuildInstanceHookEnvByObject(self, self.instance)
6088     env["TIMEOUT"] = self.op.timeout
6089     return env
6090
6091   def BuildHooksNodes(self):
6092     """Build hooks nodes.
6093
6094     """
6095     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6096     return (nl, nl)
6097
6098   def CheckPrereq(self):
6099     """Check prerequisites.
6100
6101     This checks that the instance is in the cluster.
6102
6103     """
6104     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6105     assert self.instance is not None, \
6106       "Cannot retrieve locked instance %s" % self.op.instance_name
6107
6108     self.primary_offline = \
6109       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6110
6111     if self.primary_offline and self.op.ignore_offline_nodes:
6112       self.proc.LogWarning("Ignoring offline primary node")
6113     else:
6114       _CheckNodeOnline(self, self.instance.primary_node)
6115
6116   def Exec(self, feedback_fn):
6117     """Shutdown the instance.
6118
6119     """
6120     instance = self.instance
6121     node_current = instance.primary_node
6122     timeout = self.op.timeout
6123
6124     if not self.op.no_remember:
6125       self.cfg.MarkInstanceDown(instance.name)
6126
6127     if self.primary_offline:
6128       assert self.op.ignore_offline_nodes
6129       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6130     else:
6131       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6132       msg = result.fail_msg
6133       if msg:
6134         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6135
6136       _ShutdownInstanceDisks(self, instance)
6137
6138
6139 class LUInstanceReinstall(LogicalUnit):
6140   """Reinstall an instance.
6141
6142   """
6143   HPATH = "instance-reinstall"
6144   HTYPE = constants.HTYPE_INSTANCE
6145   REQ_BGL = False
6146
6147   def ExpandNames(self):
6148     self._ExpandAndLockInstance()
6149
6150   def BuildHooksEnv(self):
6151     """Build hooks env.
6152
6153     This runs on master, primary and secondary nodes of the instance.
6154
6155     """
6156     return _BuildInstanceHookEnvByObject(self, self.instance)
6157
6158   def BuildHooksNodes(self):
6159     """Build hooks nodes.
6160
6161     """
6162     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6163     return (nl, nl)
6164
6165   def CheckPrereq(self):
6166     """Check prerequisites.
6167
6168     This checks that the instance is in the cluster and is not running.
6169
6170     """
6171     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6172     assert instance is not None, \
6173       "Cannot retrieve locked instance %s" % self.op.instance_name
6174     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6175                      " offline, cannot reinstall")
6176     for node in instance.secondary_nodes:
6177       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6178                        " cannot reinstall")
6179
6180     if instance.disk_template == constants.DT_DISKLESS:
6181       raise errors.OpPrereqError("Instance '%s' has no disks" %
6182                                  self.op.instance_name,
6183                                  errors.ECODE_INVAL)
6184     _CheckInstanceDown(self, instance, "cannot reinstall")
6185
6186     if self.op.os_type is not None:
6187       # OS verification
6188       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6189       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6190       instance_os = self.op.os_type
6191     else:
6192       instance_os = instance.os
6193
6194     nodelist = list(instance.all_nodes)
6195
6196     if self.op.osparams:
6197       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6198       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6199       self.os_inst = i_osdict # the new dict (without defaults)
6200     else:
6201       self.os_inst = None
6202
6203     self.instance = instance
6204
6205   def Exec(self, feedback_fn):
6206     """Reinstall the instance.
6207
6208     """
6209     inst = self.instance
6210
6211     if self.op.os_type is not None:
6212       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6213       inst.os = self.op.os_type
6214       # Write to configuration
6215       self.cfg.Update(inst, feedback_fn)
6216
6217     _StartInstanceDisks(self, inst, None)
6218     try:
6219       feedback_fn("Running the instance OS create scripts...")
6220       # FIXME: pass debug option from opcode to backend
6221       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6222                                              self.op.debug_level,
6223                                              osparams=self.os_inst)
6224       result.Raise("Could not install OS for instance %s on node %s" %
6225                    (inst.name, inst.primary_node))
6226     finally:
6227       _ShutdownInstanceDisks(self, inst)
6228
6229
6230 class LUInstanceRecreateDisks(LogicalUnit):
6231   """Recreate an instance's missing disks.
6232
6233   """
6234   HPATH = "instance-recreate-disks"
6235   HTYPE = constants.HTYPE_INSTANCE
6236   REQ_BGL = False
6237
6238   def CheckArguments(self):
6239     # normalise the disk list
6240     self.op.disks = sorted(frozenset(self.op.disks))
6241
6242   def ExpandNames(self):
6243     self._ExpandAndLockInstance()
6244     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6245     if self.op.nodes:
6246       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6247       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6248     else:
6249       self.needed_locks[locking.LEVEL_NODE] = []
6250
6251   def DeclareLocks(self, level):
6252     if level == locking.LEVEL_NODE:
6253       # if we replace the nodes, we only need to lock the old primary,
6254       # otherwise we need to lock all nodes for disk re-creation
6255       primary_only = bool(self.op.nodes)
6256       self._LockInstancesNodes(primary_only=primary_only)
6257
6258   def BuildHooksEnv(self):
6259     """Build hooks env.
6260
6261     This runs on master, primary and secondary nodes of the instance.
6262
6263     """
6264     return _BuildInstanceHookEnvByObject(self, self.instance)
6265
6266   def BuildHooksNodes(self):
6267     """Build hooks nodes.
6268
6269     """
6270     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6271     return (nl, nl)
6272
6273   def CheckPrereq(self):
6274     """Check prerequisites.
6275
6276     This checks that the instance is in the cluster and is not running.
6277
6278     """
6279     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6280     assert instance is not None, \
6281       "Cannot retrieve locked instance %s" % self.op.instance_name
6282     if self.op.nodes:
6283       if len(self.op.nodes) != len(instance.all_nodes):
6284         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6285                                    " %d replacement nodes were specified" %
6286                                    (instance.name, len(instance.all_nodes),
6287                                     len(self.op.nodes)),
6288                                    errors.ECODE_INVAL)
6289       assert instance.disk_template != constants.DT_DRBD8 or \
6290           len(self.op.nodes) == 2
6291       assert instance.disk_template != constants.DT_PLAIN or \
6292           len(self.op.nodes) == 1
6293       primary_node = self.op.nodes[0]
6294     else:
6295       primary_node = instance.primary_node
6296     _CheckNodeOnline(self, primary_node)
6297
6298     if instance.disk_template == constants.DT_DISKLESS:
6299       raise errors.OpPrereqError("Instance '%s' has no disks" %
6300                                  self.op.instance_name, errors.ECODE_INVAL)
6301     # if we replace nodes *and* the old primary is offline, we don't
6302     # check
6303     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6304     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6305     if not (self.op.nodes and old_pnode.offline):
6306       _CheckInstanceDown(self, instance, "cannot recreate disks")
6307
6308     if not self.op.disks:
6309       self.op.disks = range(len(instance.disks))
6310     else:
6311       for idx in self.op.disks:
6312         if idx >= len(instance.disks):
6313           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6314                                      errors.ECODE_INVAL)
6315     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6316       raise errors.OpPrereqError("Can't recreate disks partially and"
6317                                  " change the nodes at the same time",
6318                                  errors.ECODE_INVAL)
6319     self.instance = instance
6320
6321   def Exec(self, feedback_fn):
6322     """Recreate the disks.
6323
6324     """
6325     instance = self.instance
6326
6327     to_skip = []
6328     mods = [] # keeps track of needed logical_id changes
6329
6330     for idx, disk in enumerate(instance.disks):
6331       if idx not in self.op.disks: # disk idx has not been passed in
6332         to_skip.append(idx)
6333         continue
6334       # update secondaries for disks, if needed
6335       if self.op.nodes:
6336         if disk.dev_type == constants.LD_DRBD8:
6337           # need to update the nodes and minors
6338           assert len(self.op.nodes) == 2
6339           assert len(disk.logical_id) == 6 # otherwise disk internals
6340                                            # have changed
6341           (_, _, old_port, _, _, old_secret) = disk.logical_id
6342           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6343           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6344                     new_minors[0], new_minors[1], old_secret)
6345           assert len(disk.logical_id) == len(new_id)
6346           mods.append((idx, new_id))
6347
6348     # now that we have passed all asserts above, we can apply the mods
6349     # in a single run (to avoid partial changes)
6350     for idx, new_id in mods:
6351       instance.disks[idx].logical_id = new_id
6352
6353     # change primary node, if needed
6354     if self.op.nodes:
6355       instance.primary_node = self.op.nodes[0]
6356       self.LogWarning("Changing the instance's nodes, you will have to"
6357                       " remove any disks left on the older nodes manually")
6358
6359     if self.op.nodes:
6360       self.cfg.Update(instance, feedback_fn)
6361
6362     _CreateDisks(self, instance, to_skip=to_skip)
6363
6364
6365 class LUInstanceRename(LogicalUnit):
6366   """Rename an instance.
6367
6368   """
6369   HPATH = "instance-rename"
6370   HTYPE = constants.HTYPE_INSTANCE
6371
6372   def CheckArguments(self):
6373     """Check arguments.
6374
6375     """
6376     if self.op.ip_check and not self.op.name_check:
6377       # TODO: make the ip check more flexible and not depend on the name check
6378       raise errors.OpPrereqError("IP address check requires a name check",
6379                                  errors.ECODE_INVAL)
6380
6381   def BuildHooksEnv(self):
6382     """Build hooks env.
6383
6384     This runs on master, primary and secondary nodes of the instance.
6385
6386     """
6387     env = _BuildInstanceHookEnvByObject(self, self.instance)
6388     env["INSTANCE_NEW_NAME"] = self.op.new_name
6389     return env
6390
6391   def BuildHooksNodes(self):
6392     """Build hooks nodes.
6393
6394     """
6395     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6396     return (nl, nl)
6397
6398   def CheckPrereq(self):
6399     """Check prerequisites.
6400
6401     This checks that the instance is in the cluster and is not running.
6402
6403     """
6404     self.op.instance_name = _ExpandInstanceName(self.cfg,
6405                                                 self.op.instance_name)
6406     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6407     assert instance is not None
6408     _CheckNodeOnline(self, instance.primary_node)
6409     _CheckInstanceDown(self, instance, "cannot rename")
6410     self.instance = instance
6411
6412     new_name = self.op.new_name
6413     if self.op.name_check:
6414       hostname = netutils.GetHostname(name=new_name)
6415       if hostname != new_name:
6416         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6417                      hostname.name)
6418       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6419         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6420                                     " same as given hostname '%s'") %
6421                                     (hostname.name, self.op.new_name),
6422                                     errors.ECODE_INVAL)
6423       new_name = self.op.new_name = hostname.name
6424       if (self.op.ip_check and
6425           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6426         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6427                                    (hostname.ip, new_name),
6428                                    errors.ECODE_NOTUNIQUE)
6429
6430     instance_list = self.cfg.GetInstanceList()
6431     if new_name in instance_list and new_name != instance.name:
6432       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6433                                  new_name, errors.ECODE_EXISTS)
6434
6435   def Exec(self, feedback_fn):
6436     """Rename the instance.
6437
6438     """
6439     inst = self.instance
6440     old_name = inst.name
6441
6442     rename_file_storage = False
6443     if (inst.disk_template in constants.DTS_FILEBASED and
6444         self.op.new_name != inst.name):
6445       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6446       rename_file_storage = True
6447
6448     self.cfg.RenameInstance(inst.name, self.op.new_name)
6449     # Change the instance lock. This is definitely safe while we hold the BGL.
6450     # Otherwise the new lock would have to be added in acquired mode.
6451     assert self.REQ_BGL
6452     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6453     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6454
6455     # re-read the instance from the configuration after rename
6456     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6457
6458     if rename_file_storage:
6459       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6460       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6461                                                      old_file_storage_dir,
6462                                                      new_file_storage_dir)
6463       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6464                    " (but the instance has been renamed in Ganeti)" %
6465                    (inst.primary_node, old_file_storage_dir,
6466                     new_file_storage_dir))
6467
6468     _StartInstanceDisks(self, inst, None)
6469     try:
6470       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6471                                                  old_name, self.op.debug_level)
6472       msg = result.fail_msg
6473       if msg:
6474         msg = ("Could not run OS rename script for instance %s on node %s"
6475                " (but the instance has been renamed in Ganeti): %s" %
6476                (inst.name, inst.primary_node, msg))
6477         self.proc.LogWarning(msg)
6478     finally:
6479       _ShutdownInstanceDisks(self, inst)
6480
6481     return inst.name
6482
6483
6484 class LUInstanceRemove(LogicalUnit):
6485   """Remove an instance.
6486
6487   """
6488   HPATH = "instance-remove"
6489   HTYPE = constants.HTYPE_INSTANCE
6490   REQ_BGL = False
6491
6492   def ExpandNames(self):
6493     self._ExpandAndLockInstance()
6494     self.needed_locks[locking.LEVEL_NODE] = []
6495     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6496
6497   def DeclareLocks(self, level):
6498     if level == locking.LEVEL_NODE:
6499       self._LockInstancesNodes()
6500
6501   def BuildHooksEnv(self):
6502     """Build hooks env.
6503
6504     This runs on master, primary and secondary nodes of the instance.
6505
6506     """
6507     env = _BuildInstanceHookEnvByObject(self, self.instance)
6508     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6509     return env
6510
6511   def BuildHooksNodes(self):
6512     """Build hooks nodes.
6513
6514     """
6515     nl = [self.cfg.GetMasterNode()]
6516     nl_post = list(self.instance.all_nodes) + nl
6517     return (nl, nl_post)
6518
6519   def CheckPrereq(self):
6520     """Check prerequisites.
6521
6522     This checks that the instance is in the cluster.
6523
6524     """
6525     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6526     assert self.instance is not None, \
6527       "Cannot retrieve locked instance %s" % self.op.instance_name
6528
6529   def Exec(self, feedback_fn):
6530     """Remove the instance.
6531
6532     """
6533     instance = self.instance
6534     logging.info("Shutting down instance %s on node %s",
6535                  instance.name, instance.primary_node)
6536
6537     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6538                                              self.op.shutdown_timeout)
6539     msg = result.fail_msg
6540     if msg:
6541       if self.op.ignore_failures:
6542         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6543       else:
6544         raise errors.OpExecError("Could not shutdown instance %s on"
6545                                  " node %s: %s" %
6546                                  (instance.name, instance.primary_node, msg))
6547
6548     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6549
6550
6551 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6552   """Utility function to remove an instance.
6553
6554   """
6555   logging.info("Removing block devices for instance %s", instance.name)
6556
6557   if not _RemoveDisks(lu, instance):
6558     if not ignore_failures:
6559       raise errors.OpExecError("Can't remove instance's disks")
6560     feedback_fn("Warning: can't remove instance's disks")
6561
6562   logging.info("Removing instance %s out of cluster config", instance.name)
6563
6564   lu.cfg.RemoveInstance(instance.name)
6565
6566   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6567     "Instance lock removal conflict"
6568
6569   # Remove lock for the instance
6570   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6571
6572
6573 class LUInstanceQuery(NoHooksLU):
6574   """Logical unit for querying instances.
6575
6576   """
6577   # pylint: disable=W0142
6578   REQ_BGL = False
6579
6580   def CheckArguments(self):
6581     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6582                              self.op.output_fields, self.op.use_locking)
6583
6584   def ExpandNames(self):
6585     self.iq.ExpandNames(self)
6586
6587   def DeclareLocks(self, level):
6588     self.iq.DeclareLocks(self, level)
6589
6590   def Exec(self, feedback_fn):
6591     return self.iq.OldStyleQuery(self)
6592
6593
6594 class LUInstanceFailover(LogicalUnit):
6595   """Failover an instance.
6596
6597   """
6598   HPATH = "instance-failover"
6599   HTYPE = constants.HTYPE_INSTANCE
6600   REQ_BGL = False
6601
6602   def CheckArguments(self):
6603     """Check the arguments.
6604
6605     """
6606     self.iallocator = getattr(self.op, "iallocator", None)
6607     self.target_node = getattr(self.op, "target_node", None)
6608
6609   def ExpandNames(self):
6610     self._ExpandAndLockInstance()
6611
6612     if self.op.target_node is not None:
6613       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6614
6615     self.needed_locks[locking.LEVEL_NODE] = []
6616     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6617
6618     ignore_consistency = self.op.ignore_consistency
6619     shutdown_timeout = self.op.shutdown_timeout
6620     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6621                                        cleanup=False,
6622                                        failover=True,
6623                                        ignore_consistency=ignore_consistency,
6624                                        shutdown_timeout=shutdown_timeout)
6625     self.tasklets = [self._migrater]
6626
6627   def DeclareLocks(self, level):
6628     if level == locking.LEVEL_NODE:
6629       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6630       if instance.disk_template in constants.DTS_EXT_MIRROR:
6631         if self.op.target_node is None:
6632           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6633         else:
6634           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6635                                                    self.op.target_node]
6636         del self.recalculate_locks[locking.LEVEL_NODE]
6637       else:
6638         self._LockInstancesNodes()
6639
6640   def BuildHooksEnv(self):
6641     """Build hooks env.
6642
6643     This runs on master, primary and secondary nodes of the instance.
6644
6645     """
6646     instance = self._migrater.instance
6647     source_node = instance.primary_node
6648     target_node = self.op.target_node
6649     env = {
6650       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6651       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6652       "OLD_PRIMARY": source_node,
6653       "NEW_PRIMARY": target_node,
6654       }
6655
6656     if instance.disk_template in constants.DTS_INT_MIRROR:
6657       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6658       env["NEW_SECONDARY"] = source_node
6659     else:
6660       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6661
6662     env.update(_BuildInstanceHookEnvByObject(self, instance))
6663
6664     return env
6665
6666   def BuildHooksNodes(self):
6667     """Build hooks nodes.
6668
6669     """
6670     instance = self._migrater.instance
6671     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6672     return (nl, nl + [instance.primary_node])
6673
6674
6675 class LUInstanceMigrate(LogicalUnit):
6676   """Migrate an instance.
6677
6678   This is migration without shutting down, compared to the failover,
6679   which is done with shutdown.
6680
6681   """
6682   HPATH = "instance-migrate"
6683   HTYPE = constants.HTYPE_INSTANCE
6684   REQ_BGL = False
6685
6686   def ExpandNames(self):
6687     self._ExpandAndLockInstance()
6688
6689     if self.op.target_node is not None:
6690       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6691
6692     self.needed_locks[locking.LEVEL_NODE] = []
6693     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6694
6695     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6696                                        cleanup=self.op.cleanup,
6697                                        failover=False,
6698                                        fallback=self.op.allow_failover)
6699     self.tasklets = [self._migrater]
6700
6701   def DeclareLocks(self, level):
6702     if level == locking.LEVEL_NODE:
6703       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6704       if instance.disk_template in constants.DTS_EXT_MIRROR:
6705         if self.op.target_node is None:
6706           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6707         else:
6708           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6709                                                    self.op.target_node]
6710         del self.recalculate_locks[locking.LEVEL_NODE]
6711       else:
6712         self._LockInstancesNodes()
6713
6714   def BuildHooksEnv(self):
6715     """Build hooks env.
6716
6717     This runs on master, primary and secondary nodes of the instance.
6718
6719     """
6720     instance = self._migrater.instance
6721     source_node = instance.primary_node
6722     target_node = self.op.target_node
6723     env = _BuildInstanceHookEnvByObject(self, instance)
6724     env.update({
6725       "MIGRATE_LIVE": self._migrater.live,
6726       "MIGRATE_CLEANUP": self.op.cleanup,
6727       "OLD_PRIMARY": source_node,
6728       "NEW_PRIMARY": target_node,
6729       })
6730
6731     if instance.disk_template in constants.DTS_INT_MIRROR:
6732       env["OLD_SECONDARY"] = target_node
6733       env["NEW_SECONDARY"] = source_node
6734     else:
6735       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6736
6737     return env
6738
6739   def BuildHooksNodes(self):
6740     """Build hooks nodes.
6741
6742     """
6743     instance = self._migrater.instance
6744     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6745     return (nl, nl + [instance.primary_node])
6746
6747
6748 class LUInstanceMove(LogicalUnit):
6749   """Move an instance by data-copying.
6750
6751   """
6752   HPATH = "instance-move"
6753   HTYPE = constants.HTYPE_INSTANCE
6754   REQ_BGL = False
6755
6756   def ExpandNames(self):
6757     self._ExpandAndLockInstance()
6758     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6759     self.op.target_node = target_node
6760     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6761     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6762
6763   def DeclareLocks(self, level):
6764     if level == locking.LEVEL_NODE:
6765       self._LockInstancesNodes(primary_only=True)
6766
6767   def BuildHooksEnv(self):
6768     """Build hooks env.
6769
6770     This runs on master, primary and secondary nodes of the instance.
6771
6772     """
6773     env = {
6774       "TARGET_NODE": self.op.target_node,
6775       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6776       }
6777     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6778     return env
6779
6780   def BuildHooksNodes(self):
6781     """Build hooks nodes.
6782
6783     """
6784     nl = [
6785       self.cfg.GetMasterNode(),
6786       self.instance.primary_node,
6787       self.op.target_node,
6788       ]
6789     return (nl, nl)
6790
6791   def CheckPrereq(self):
6792     """Check prerequisites.
6793
6794     This checks that the instance is in the cluster.
6795
6796     """
6797     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6798     assert self.instance is not None, \
6799       "Cannot retrieve locked instance %s" % self.op.instance_name
6800
6801     node = self.cfg.GetNodeInfo(self.op.target_node)
6802     assert node is not None, \
6803       "Cannot retrieve locked node %s" % self.op.target_node
6804
6805     self.target_node = target_node = node.name
6806
6807     if target_node == instance.primary_node:
6808       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6809                                  (instance.name, target_node),
6810                                  errors.ECODE_STATE)
6811
6812     bep = self.cfg.GetClusterInfo().FillBE(instance)
6813
6814     for idx, dsk in enumerate(instance.disks):
6815       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6816         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6817                                    " cannot copy" % idx, errors.ECODE_STATE)
6818
6819     _CheckNodeOnline(self, target_node)
6820     _CheckNodeNotDrained(self, target_node)
6821     _CheckNodeVmCapable(self, target_node)
6822
6823     if instance.admin_up:
6824       # check memory requirements on the secondary node
6825       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6826                            instance.name, bep[constants.BE_MEMORY],
6827                            instance.hypervisor)
6828     else:
6829       self.LogInfo("Not checking memory on the secondary node as"
6830                    " instance will not be started")
6831
6832     # check bridge existance
6833     _CheckInstanceBridgesExist(self, instance, node=target_node)
6834
6835   def Exec(self, feedback_fn):
6836     """Move an instance.
6837
6838     The move is done by shutting it down on its present node, copying
6839     the data over (slow) and starting it on the new node.
6840
6841     """
6842     instance = self.instance
6843
6844     source_node = instance.primary_node
6845     target_node = self.target_node
6846
6847     self.LogInfo("Shutting down instance %s on source node %s",
6848                  instance.name, source_node)
6849
6850     result = self.rpc.call_instance_shutdown(source_node, instance,
6851                                              self.op.shutdown_timeout)
6852     msg = result.fail_msg
6853     if msg:
6854       if self.op.ignore_consistency:
6855         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6856                              " Proceeding anyway. Please make sure node"
6857                              " %s is down. Error details: %s",
6858                              instance.name, source_node, source_node, msg)
6859       else:
6860         raise errors.OpExecError("Could not shutdown instance %s on"
6861                                  " node %s: %s" %
6862                                  (instance.name, source_node, msg))
6863
6864     # create the target disks
6865     try:
6866       _CreateDisks(self, instance, target_node=target_node)
6867     except errors.OpExecError:
6868       self.LogWarning("Device creation failed, reverting...")
6869       try:
6870         _RemoveDisks(self, instance, target_node=target_node)
6871       finally:
6872         self.cfg.ReleaseDRBDMinors(instance.name)
6873         raise
6874
6875     cluster_name = self.cfg.GetClusterInfo().cluster_name
6876
6877     errs = []
6878     # activate, get path, copy the data over
6879     for idx, disk in enumerate(instance.disks):
6880       self.LogInfo("Copying data for disk %d", idx)
6881       result = self.rpc.call_blockdev_assemble(target_node, disk,
6882                                                instance.name, True, idx)
6883       if result.fail_msg:
6884         self.LogWarning("Can't assemble newly created disk %d: %s",
6885                         idx, result.fail_msg)
6886         errs.append(result.fail_msg)
6887         break
6888       dev_path = result.payload
6889       result = self.rpc.call_blockdev_export(source_node, disk,
6890                                              target_node, dev_path,
6891                                              cluster_name)
6892       if result.fail_msg:
6893         self.LogWarning("Can't copy data over for disk %d: %s",
6894                         idx, result.fail_msg)
6895         errs.append(result.fail_msg)
6896         break
6897
6898     if errs:
6899       self.LogWarning("Some disks failed to copy, aborting")
6900       try:
6901         _RemoveDisks(self, instance, target_node=target_node)
6902       finally:
6903         self.cfg.ReleaseDRBDMinors(instance.name)
6904         raise errors.OpExecError("Errors during disk copy: %s" %
6905                                  (",".join(errs),))
6906
6907     instance.primary_node = target_node
6908     self.cfg.Update(instance, feedback_fn)
6909
6910     self.LogInfo("Removing the disks on the original node")
6911     _RemoveDisks(self, instance, target_node=source_node)
6912
6913     # Only start the instance if it's marked as up
6914     if instance.admin_up:
6915       self.LogInfo("Starting instance %s on node %s",
6916                    instance.name, target_node)
6917
6918       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6919                                            ignore_secondaries=True)
6920       if not disks_ok:
6921         _ShutdownInstanceDisks(self, instance)
6922         raise errors.OpExecError("Can't activate the instance's disks")
6923
6924       result = self.rpc.call_instance_start(target_node, instance,
6925                                             None, None, False)
6926       msg = result.fail_msg
6927       if msg:
6928         _ShutdownInstanceDisks(self, instance)
6929         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6930                                  (instance.name, target_node, msg))
6931
6932
6933 class LUNodeMigrate(LogicalUnit):
6934   """Migrate all instances from a node.
6935
6936   """
6937   HPATH = "node-migrate"
6938   HTYPE = constants.HTYPE_NODE
6939   REQ_BGL = False
6940
6941   def CheckArguments(self):
6942     pass
6943
6944   def ExpandNames(self):
6945     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6946
6947     self.share_locks = _ShareAll()
6948     self.needed_locks = {
6949       locking.LEVEL_NODE: [self.op.node_name],
6950       }
6951
6952   def BuildHooksEnv(self):
6953     """Build hooks env.
6954
6955     This runs on the master, the primary and all the secondaries.
6956
6957     """
6958     return {
6959       "NODE_NAME": self.op.node_name,
6960       }
6961
6962   def BuildHooksNodes(self):
6963     """Build hooks nodes.
6964
6965     """
6966     nl = [self.cfg.GetMasterNode()]
6967     return (nl, nl)
6968
6969   def CheckPrereq(self):
6970     pass
6971
6972   def Exec(self, feedback_fn):
6973     # Prepare jobs for migration instances
6974     jobs = [
6975       [opcodes.OpInstanceMigrate(instance_name=inst.name,
6976                                  mode=self.op.mode,
6977                                  live=self.op.live,
6978                                  iallocator=self.op.iallocator,
6979                                  target_node=self.op.target_node)]
6980       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6981       ]
6982
6983     # TODO: Run iallocator in this opcode and pass correct placement options to
6984     # OpInstanceMigrate. Since other jobs can modify the cluster between
6985     # running the iallocator and the actual migration, a good consistency model
6986     # will have to be found.
6987
6988     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
6989             frozenset([self.op.node_name]))
6990
6991     return ResultWithJobs(jobs)
6992
6993
6994 class TLMigrateInstance(Tasklet):
6995   """Tasklet class for instance migration.
6996
6997   @type live: boolean
6998   @ivar live: whether the migration will be done live or non-live;
6999       this variable is initalized only after CheckPrereq has run
7000   @type cleanup: boolean
7001   @ivar cleanup: Wheater we cleanup from a failed migration
7002   @type iallocator: string
7003   @ivar iallocator: The iallocator used to determine target_node
7004   @type target_node: string
7005   @ivar target_node: If given, the target_node to reallocate the instance to
7006   @type failover: boolean
7007   @ivar failover: Whether operation results in failover or migration
7008   @type fallback: boolean
7009   @ivar fallback: Whether fallback to failover is allowed if migration not
7010                   possible
7011   @type ignore_consistency: boolean
7012   @ivar ignore_consistency: Wheter we should ignore consistency between source
7013                             and target node
7014   @type shutdown_timeout: int
7015   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7016
7017   """
7018   def __init__(self, lu, instance_name, cleanup=False,
7019                failover=False, fallback=False,
7020                ignore_consistency=False,
7021                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7022     """Initializes this class.
7023
7024     """
7025     Tasklet.__init__(self, lu)
7026
7027     # Parameters
7028     self.instance_name = instance_name
7029     self.cleanup = cleanup
7030     self.live = False # will be overridden later
7031     self.failover = failover
7032     self.fallback = fallback
7033     self.ignore_consistency = ignore_consistency
7034     self.shutdown_timeout = shutdown_timeout
7035
7036   def CheckPrereq(self):
7037     """Check prerequisites.
7038
7039     This checks that the instance is in the cluster.
7040
7041     """
7042     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7043     instance = self.cfg.GetInstanceInfo(instance_name)
7044     assert instance is not None
7045     self.instance = instance
7046
7047     if (not self.cleanup and not instance.admin_up and not self.failover and
7048         self.fallback):
7049       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7050                       " to failover")
7051       self.failover = True
7052
7053     if instance.disk_template not in constants.DTS_MIRRORED:
7054       if self.failover:
7055         text = "failovers"
7056       else:
7057         text = "migrations"
7058       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7059                                  " %s" % (instance.disk_template, text),
7060                                  errors.ECODE_STATE)
7061
7062     if instance.disk_template in constants.DTS_EXT_MIRROR:
7063       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7064
7065       if self.lu.op.iallocator:
7066         self._RunAllocator()
7067       else:
7068         # We set set self.target_node as it is required by
7069         # BuildHooksEnv
7070         self.target_node = self.lu.op.target_node
7071
7072       # self.target_node is already populated, either directly or by the
7073       # iallocator run
7074       target_node = self.target_node
7075       if self.target_node == instance.primary_node:
7076         raise errors.OpPrereqError("Cannot migrate instance %s"
7077                                    " to its primary (%s)" %
7078                                    (instance.name, instance.primary_node))
7079
7080       if len(self.lu.tasklets) == 1:
7081         # It is safe to release locks only when we're the only tasklet
7082         # in the LU
7083         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7084                       keep=[instance.primary_node, self.target_node])
7085
7086     else:
7087       secondary_nodes = instance.secondary_nodes
7088       if not secondary_nodes:
7089         raise errors.ConfigurationError("No secondary node but using"
7090                                         " %s disk template" %
7091                                         instance.disk_template)
7092       target_node = secondary_nodes[0]
7093       if self.lu.op.iallocator or (self.lu.op.target_node and
7094                                    self.lu.op.target_node != target_node):
7095         if self.failover:
7096           text = "failed over"
7097         else:
7098           text = "migrated"
7099         raise errors.OpPrereqError("Instances with disk template %s cannot"
7100                                    " be %s to arbitrary nodes"
7101                                    " (neither an iallocator nor a target"
7102                                    " node can be passed)" %
7103                                    (instance.disk_template, text),
7104                                    errors.ECODE_INVAL)
7105
7106     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7107
7108     # check memory requirements on the secondary node
7109     if not self.failover or instance.admin_up:
7110       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7111                            instance.name, i_be[constants.BE_MEMORY],
7112                            instance.hypervisor)
7113     else:
7114       self.lu.LogInfo("Not checking memory on the secondary node as"
7115                       " instance will not be started")
7116
7117     # check bridge existance
7118     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7119
7120     if not self.cleanup:
7121       _CheckNodeNotDrained(self.lu, target_node)
7122       if not self.failover:
7123         result = self.rpc.call_instance_migratable(instance.primary_node,
7124                                                    instance)
7125         if result.fail_msg and self.fallback:
7126           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7127                           " failover")
7128           self.failover = True
7129         else:
7130           result.Raise("Can't migrate, please use failover",
7131                        prereq=True, ecode=errors.ECODE_STATE)
7132
7133     assert not (self.failover and self.cleanup)
7134
7135     if not self.failover:
7136       if self.lu.op.live is not None and self.lu.op.mode is not None:
7137         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7138                                    " parameters are accepted",
7139                                    errors.ECODE_INVAL)
7140       if self.lu.op.live is not None:
7141         if self.lu.op.live:
7142           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7143         else:
7144           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7145         # reset the 'live' parameter to None so that repeated
7146         # invocations of CheckPrereq do not raise an exception
7147         self.lu.op.live = None
7148       elif self.lu.op.mode is None:
7149         # read the default value from the hypervisor
7150         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7151                                                 skip_globals=False)
7152         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7153
7154       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7155     else:
7156       # Failover is never live
7157       self.live = False
7158
7159   def _RunAllocator(self):
7160     """Run the allocator based on input opcode.
7161
7162     """
7163     ial = IAllocator(self.cfg, self.rpc,
7164                      mode=constants.IALLOCATOR_MODE_RELOC,
7165                      name=self.instance_name,
7166                      # TODO See why hail breaks with a single node below
7167                      relocate_from=[self.instance.primary_node,
7168                                     self.instance.primary_node],
7169                      )
7170
7171     ial.Run(self.lu.op.iallocator)
7172
7173     if not ial.success:
7174       raise errors.OpPrereqError("Can't compute nodes using"
7175                                  " iallocator '%s': %s" %
7176                                  (self.lu.op.iallocator, ial.info),
7177                                  errors.ECODE_NORES)
7178     if len(ial.result) != ial.required_nodes:
7179       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7180                                  " of nodes (%s), required %s" %
7181                                  (self.lu.op.iallocator, len(ial.result),
7182                                   ial.required_nodes), errors.ECODE_FAULT)
7183     self.target_node = ial.result[0]
7184     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7185                  self.instance_name, self.lu.op.iallocator,
7186                  utils.CommaJoin(ial.result))
7187
7188   def _WaitUntilSync(self):
7189     """Poll with custom rpc for disk sync.
7190
7191     This uses our own step-based rpc call.
7192
7193     """
7194     self.feedback_fn("* wait until resync is done")
7195     all_done = False
7196     while not all_done:
7197       all_done = True
7198       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7199                                             self.nodes_ip,
7200                                             self.instance.disks)
7201       min_percent = 100
7202       for node, nres in result.items():
7203         nres.Raise("Cannot resync disks on node %s" % node)
7204         node_done, node_percent = nres.payload
7205         all_done = all_done and node_done
7206         if node_percent is not None:
7207           min_percent = min(min_percent, node_percent)
7208       if not all_done:
7209         if min_percent < 100:
7210           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7211         time.sleep(2)
7212
7213   def _EnsureSecondary(self, node):
7214     """Demote a node to secondary.
7215
7216     """
7217     self.feedback_fn("* switching node %s to secondary mode" % node)
7218
7219     for dev in self.instance.disks:
7220       self.cfg.SetDiskID(dev, node)
7221
7222     result = self.rpc.call_blockdev_close(node, self.instance.name,
7223                                           self.instance.disks)
7224     result.Raise("Cannot change disk to secondary on node %s" % node)
7225
7226   def _GoStandalone(self):
7227     """Disconnect from the network.
7228
7229     """
7230     self.feedback_fn("* changing into standalone mode")
7231     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7232                                                self.instance.disks)
7233     for node, nres in result.items():
7234       nres.Raise("Cannot disconnect disks node %s" % node)
7235
7236   def _GoReconnect(self, multimaster):
7237     """Reconnect to the network.
7238
7239     """
7240     if multimaster:
7241       msg = "dual-master"
7242     else:
7243       msg = "single-master"
7244     self.feedback_fn("* changing disks into %s mode" % msg)
7245     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7246                                            self.instance.disks,
7247                                            self.instance.name, multimaster)
7248     for node, nres in result.items():
7249       nres.Raise("Cannot change disks config on node %s" % node)
7250
7251   def _ExecCleanup(self):
7252     """Try to cleanup after a failed migration.
7253
7254     The cleanup is done by:
7255       - check that the instance is running only on one node
7256         (and update the config if needed)
7257       - change disks on its secondary node to secondary
7258       - wait until disks are fully synchronized
7259       - disconnect from the network
7260       - change disks into single-master mode
7261       - wait again until disks are fully synchronized
7262
7263     """
7264     instance = self.instance
7265     target_node = self.target_node
7266     source_node = self.source_node
7267
7268     # check running on only one node
7269     self.feedback_fn("* checking where the instance actually runs"
7270                      " (if this hangs, the hypervisor might be in"
7271                      " a bad state)")
7272     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7273     for node, result in ins_l.items():
7274       result.Raise("Can't contact node %s" % node)
7275
7276     runningon_source = instance.name in ins_l[source_node].payload
7277     runningon_target = instance.name in ins_l[target_node].payload
7278
7279     if runningon_source and runningon_target:
7280       raise errors.OpExecError("Instance seems to be running on two nodes,"
7281                                " or the hypervisor is confused; you will have"
7282                                " to ensure manually that it runs only on one"
7283                                " and restart this operation")
7284
7285     if not (runningon_source or runningon_target):
7286       raise errors.OpExecError("Instance does not seem to be running at all;"
7287                                " in this case it's safer to repair by"
7288                                " running 'gnt-instance stop' to ensure disk"
7289                                " shutdown, and then restarting it")
7290
7291     if runningon_target:
7292       # the migration has actually succeeded, we need to update the config
7293       self.feedback_fn("* instance running on secondary node (%s),"
7294                        " updating config" % target_node)
7295       instance.primary_node = target_node
7296       self.cfg.Update(instance, self.feedback_fn)
7297       demoted_node = source_node
7298     else:
7299       self.feedback_fn("* instance confirmed to be running on its"
7300                        " primary node (%s)" % source_node)
7301       demoted_node = target_node
7302
7303     if instance.disk_template in constants.DTS_INT_MIRROR:
7304       self._EnsureSecondary(demoted_node)
7305       try:
7306         self._WaitUntilSync()
7307       except errors.OpExecError:
7308         # we ignore here errors, since if the device is standalone, it
7309         # won't be able to sync
7310         pass
7311       self._GoStandalone()
7312       self._GoReconnect(False)
7313       self._WaitUntilSync()
7314
7315     self.feedback_fn("* done")
7316
7317   def _RevertDiskStatus(self):
7318     """Try to revert the disk status after a failed migration.
7319
7320     """
7321     target_node = self.target_node
7322     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7323       return
7324
7325     try:
7326       self._EnsureSecondary(target_node)
7327       self._GoStandalone()
7328       self._GoReconnect(False)
7329       self._WaitUntilSync()
7330     except errors.OpExecError, err:
7331       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7332                          " please try to recover the instance manually;"
7333                          " error '%s'" % str(err))
7334
7335   def _AbortMigration(self):
7336     """Call the hypervisor code to abort a started migration.
7337
7338     """
7339     instance = self.instance
7340     target_node = self.target_node
7341     migration_info = self.migration_info
7342
7343     abort_result = self.rpc.call_finalize_migration(target_node,
7344                                                     instance,
7345                                                     migration_info,
7346                                                     False)
7347     abort_msg = abort_result.fail_msg
7348     if abort_msg:
7349       logging.error("Aborting migration failed on target node %s: %s",
7350                     target_node, abort_msg)
7351       # Don't raise an exception here, as we stil have to try to revert the
7352       # disk status, even if this step failed.
7353
7354   def _ExecMigration(self):
7355     """Migrate an instance.
7356
7357     The migrate is done by:
7358       - change the disks into dual-master mode
7359       - wait until disks are fully synchronized again
7360       - migrate the instance
7361       - change disks on the new secondary node (the old primary) to secondary
7362       - wait until disks are fully synchronized
7363       - change disks into single-master mode
7364
7365     """
7366     instance = self.instance
7367     target_node = self.target_node
7368     source_node = self.source_node
7369
7370     self.feedback_fn("* checking disk consistency between source and target")
7371     for dev in instance.disks:
7372       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7373         raise errors.OpExecError("Disk %s is degraded or not fully"
7374                                  " synchronized on target node,"
7375                                  " aborting migration" % dev.iv_name)
7376
7377     # First get the migration information from the remote node
7378     result = self.rpc.call_migration_info(source_node, instance)
7379     msg = result.fail_msg
7380     if msg:
7381       log_err = ("Failed fetching source migration information from %s: %s" %
7382                  (source_node, msg))
7383       logging.error(log_err)
7384       raise errors.OpExecError(log_err)
7385
7386     self.migration_info = migration_info = result.payload
7387
7388     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7389       # Then switch the disks to master/master mode
7390       self._EnsureSecondary(target_node)
7391       self._GoStandalone()
7392       self._GoReconnect(True)
7393       self._WaitUntilSync()
7394
7395     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7396     result = self.rpc.call_accept_instance(target_node,
7397                                            instance,
7398                                            migration_info,
7399                                            self.nodes_ip[target_node])
7400
7401     msg = result.fail_msg
7402     if msg:
7403       logging.error("Instance pre-migration failed, trying to revert"
7404                     " disk status: %s", msg)
7405       self.feedback_fn("Pre-migration failed, aborting")
7406       self._AbortMigration()
7407       self._RevertDiskStatus()
7408       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7409                                (instance.name, msg))
7410
7411     self.feedback_fn("* migrating instance to %s" % target_node)
7412     result = self.rpc.call_instance_migrate(source_node, instance,
7413                                             self.nodes_ip[target_node],
7414                                             self.live)
7415     msg = result.fail_msg
7416     if msg:
7417       logging.error("Instance migration failed, trying to revert"
7418                     " disk status: %s", msg)
7419       self.feedback_fn("Migration failed, aborting")
7420       self._AbortMigration()
7421       self._RevertDiskStatus()
7422       raise errors.OpExecError("Could not migrate instance %s: %s" %
7423                                (instance.name, msg))
7424
7425     instance.primary_node = target_node
7426     # distribute new instance config to the other nodes
7427     self.cfg.Update(instance, self.feedback_fn)
7428
7429     result = self.rpc.call_finalize_migration(target_node,
7430                                               instance,
7431                                               migration_info,
7432                                               True)
7433     msg = result.fail_msg
7434     if msg:
7435       logging.error("Instance migration succeeded, but finalization failed:"
7436                     " %s", msg)
7437       raise errors.OpExecError("Could not finalize instance migration: %s" %
7438                                msg)
7439
7440     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7441       self._EnsureSecondary(source_node)
7442       self._WaitUntilSync()
7443       self._GoStandalone()
7444       self._GoReconnect(False)
7445       self._WaitUntilSync()
7446
7447     self.feedback_fn("* done")
7448
7449   def _ExecFailover(self):
7450     """Failover an instance.
7451
7452     The failover is done by shutting it down on its present node and
7453     starting it on the secondary.
7454
7455     """
7456     instance = self.instance
7457     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7458
7459     source_node = instance.primary_node
7460     target_node = self.target_node
7461
7462     if instance.admin_up:
7463       self.feedback_fn("* checking disk consistency between source and target")
7464       for dev in instance.disks:
7465         # for drbd, these are drbd over lvm
7466         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7467           if primary_node.offline:
7468             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7469                              " target node %s" %
7470                              (primary_node.name, dev.iv_name, target_node))
7471           elif not self.ignore_consistency:
7472             raise errors.OpExecError("Disk %s is degraded on target node,"
7473                                      " aborting failover" % dev.iv_name)
7474     else:
7475       self.feedback_fn("* not checking disk consistency as instance is not"
7476                        " running")
7477
7478     self.feedback_fn("* shutting down instance on source node")
7479     logging.info("Shutting down instance %s on node %s",
7480                  instance.name, source_node)
7481
7482     result = self.rpc.call_instance_shutdown(source_node, instance,
7483                                              self.shutdown_timeout)
7484     msg = result.fail_msg
7485     if msg:
7486       if self.ignore_consistency or primary_node.offline:
7487         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7488                            " proceeding anyway; please make sure node"
7489                            " %s is down; error details: %s",
7490                            instance.name, source_node, source_node, msg)
7491       else:
7492         raise errors.OpExecError("Could not shutdown instance %s on"
7493                                  " node %s: %s" %
7494                                  (instance.name, source_node, msg))
7495
7496     self.feedback_fn("* deactivating the instance's disks on source node")
7497     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7498       raise errors.OpExecError("Can't shut down the instance's disks")
7499
7500     instance.primary_node = target_node
7501     # distribute new instance config to the other nodes
7502     self.cfg.Update(instance, self.feedback_fn)
7503
7504     # Only start the instance if it's marked as up
7505     if instance.admin_up:
7506       self.feedback_fn("* activating the instance's disks on target node %s" %
7507                        target_node)
7508       logging.info("Starting instance %s on node %s",
7509                    instance.name, target_node)
7510
7511       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7512                                            ignore_secondaries=True)
7513       if not disks_ok:
7514         _ShutdownInstanceDisks(self.lu, instance)
7515         raise errors.OpExecError("Can't activate the instance's disks")
7516
7517       self.feedback_fn("* starting the instance on the target node %s" %
7518                        target_node)
7519       result = self.rpc.call_instance_start(target_node, instance, None, None,
7520                                             False)
7521       msg = result.fail_msg
7522       if msg:
7523         _ShutdownInstanceDisks(self.lu, instance)
7524         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7525                                  (instance.name, target_node, msg))
7526
7527   def Exec(self, feedback_fn):
7528     """Perform the migration.
7529
7530     """
7531     self.feedback_fn = feedback_fn
7532     self.source_node = self.instance.primary_node
7533
7534     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7535     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7536       self.target_node = self.instance.secondary_nodes[0]
7537       # Otherwise self.target_node has been populated either
7538       # directly, or through an iallocator.
7539
7540     self.all_nodes = [self.source_node, self.target_node]
7541     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7542                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7543
7544     if self.failover:
7545       feedback_fn("Failover instance %s" % self.instance.name)
7546       self._ExecFailover()
7547     else:
7548       feedback_fn("Migrating instance %s" % self.instance.name)
7549
7550       if self.cleanup:
7551         return self._ExecCleanup()
7552       else:
7553         return self._ExecMigration()
7554
7555
7556 def _CreateBlockDev(lu, node, instance, device, force_create,
7557                     info, force_open):
7558   """Create a tree of block devices on a given node.
7559
7560   If this device type has to be created on secondaries, create it and
7561   all its children.
7562
7563   If not, just recurse to children keeping the same 'force' value.
7564
7565   @param lu: the lu on whose behalf we execute
7566   @param node: the node on which to create the device
7567   @type instance: L{objects.Instance}
7568   @param instance: the instance which owns the device
7569   @type device: L{objects.Disk}
7570   @param device: the device to create
7571   @type force_create: boolean
7572   @param force_create: whether to force creation of this device; this
7573       will be change to True whenever we find a device which has
7574       CreateOnSecondary() attribute
7575   @param info: the extra 'metadata' we should attach to the device
7576       (this will be represented as a LVM tag)
7577   @type force_open: boolean
7578   @param force_open: this parameter will be passes to the
7579       L{backend.BlockdevCreate} function where it specifies
7580       whether we run on primary or not, and it affects both
7581       the child assembly and the device own Open() execution
7582
7583   """
7584   if device.CreateOnSecondary():
7585     force_create = True
7586
7587   if device.children:
7588     for child in device.children:
7589       _CreateBlockDev(lu, node, instance, child, force_create,
7590                       info, force_open)
7591
7592   if not force_create:
7593     return
7594
7595   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7596
7597
7598 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7599   """Create a single block device on a given node.
7600
7601   This will not recurse over children of the device, so they must be
7602   created in advance.
7603
7604   @param lu: the lu on whose behalf we execute
7605   @param node: the node on which to create the device
7606   @type instance: L{objects.Instance}
7607   @param instance: the instance which owns the device
7608   @type device: L{objects.Disk}
7609   @param device: the device to create
7610   @param info: the extra 'metadata' we should attach to the device
7611       (this will be represented as a LVM tag)
7612   @type force_open: boolean
7613   @param force_open: this parameter will be passes to the
7614       L{backend.BlockdevCreate} function where it specifies
7615       whether we run on primary or not, and it affects both
7616       the child assembly and the device own Open() execution
7617
7618   """
7619   lu.cfg.SetDiskID(device, node)
7620   result = lu.rpc.call_blockdev_create(node, device, device.size,
7621                                        instance.name, force_open, info)
7622   result.Raise("Can't create block device %s on"
7623                " node %s for instance %s" % (device, node, instance.name))
7624   if device.physical_id is None:
7625     device.physical_id = result.payload
7626
7627
7628 def _GenerateUniqueNames(lu, exts):
7629   """Generate a suitable LV name.
7630
7631   This will generate a logical volume name for the given instance.
7632
7633   """
7634   results = []
7635   for val in exts:
7636     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7637     results.append("%s%s" % (new_id, val))
7638   return results
7639
7640
7641 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7642                          iv_name, p_minor, s_minor):
7643   """Generate a drbd8 device complete with its children.
7644
7645   """
7646   assert len(vgnames) == len(names) == 2
7647   port = lu.cfg.AllocatePort()
7648   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7649   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7650                           logical_id=(vgnames[0], names[0]))
7651   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7652                           logical_id=(vgnames[1], names[1]))
7653   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7654                           logical_id=(primary, secondary, port,
7655                                       p_minor, s_minor,
7656                                       shared_secret),
7657                           children=[dev_data, dev_meta],
7658                           iv_name=iv_name)
7659   return drbd_dev
7660
7661
7662 def _GenerateDiskTemplate(lu, template_name,
7663                           instance_name, primary_node,
7664                           secondary_nodes, disk_info,
7665                           file_storage_dir, file_driver,
7666                           base_index, feedback_fn):
7667   """Generate the entire disk layout for a given template type.
7668
7669   """
7670   #TODO: compute space requirements
7671
7672   vgname = lu.cfg.GetVGName()
7673   disk_count = len(disk_info)
7674   disks = []
7675   if template_name == constants.DT_DISKLESS:
7676     pass
7677   elif template_name == constants.DT_PLAIN:
7678     if len(secondary_nodes) != 0:
7679       raise errors.ProgrammerError("Wrong template configuration")
7680
7681     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7682                                       for i in range(disk_count)])
7683     for idx, disk in enumerate(disk_info):
7684       disk_index = idx + base_index
7685       vg = disk.get(constants.IDISK_VG, vgname)
7686       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7687       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7688                               size=disk[constants.IDISK_SIZE],
7689                               logical_id=(vg, names[idx]),
7690                               iv_name="disk/%d" % disk_index,
7691                               mode=disk[constants.IDISK_MODE])
7692       disks.append(disk_dev)
7693   elif template_name == constants.DT_DRBD8:
7694     if len(secondary_nodes) != 1:
7695       raise errors.ProgrammerError("Wrong template configuration")
7696     remote_node = secondary_nodes[0]
7697     minors = lu.cfg.AllocateDRBDMinor(
7698       [primary_node, remote_node] * len(disk_info), instance_name)
7699
7700     names = []
7701     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7702                                                for i in range(disk_count)]):
7703       names.append(lv_prefix + "_data")
7704       names.append(lv_prefix + "_meta")
7705     for idx, disk in enumerate(disk_info):
7706       disk_index = idx + base_index
7707       data_vg = disk.get(constants.IDISK_VG, vgname)
7708       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7709       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7710                                       disk[constants.IDISK_SIZE],
7711                                       [data_vg, meta_vg],
7712                                       names[idx * 2:idx * 2 + 2],
7713                                       "disk/%d" % disk_index,
7714                                       minors[idx * 2], minors[idx * 2 + 1])
7715       disk_dev.mode = disk[constants.IDISK_MODE]
7716       disks.append(disk_dev)
7717   elif template_name == constants.DT_FILE:
7718     if len(secondary_nodes) != 0:
7719       raise errors.ProgrammerError("Wrong template configuration")
7720
7721     opcodes.RequireFileStorage()
7722
7723     for idx, disk in enumerate(disk_info):
7724       disk_index = idx + base_index
7725       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7726                               size=disk[constants.IDISK_SIZE],
7727                               iv_name="disk/%d" % disk_index,
7728                               logical_id=(file_driver,
7729                                           "%s/disk%d" % (file_storage_dir,
7730                                                          disk_index)),
7731                               mode=disk[constants.IDISK_MODE])
7732       disks.append(disk_dev)
7733   elif template_name == constants.DT_SHARED_FILE:
7734     if len(secondary_nodes) != 0:
7735       raise errors.ProgrammerError("Wrong template configuration")
7736
7737     opcodes.RequireSharedFileStorage()
7738
7739     for idx, disk in enumerate(disk_info):
7740       disk_index = idx + base_index
7741       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7742                               size=disk[constants.IDISK_SIZE],
7743                               iv_name="disk/%d" % disk_index,
7744                               logical_id=(file_driver,
7745                                           "%s/disk%d" % (file_storage_dir,
7746                                                          disk_index)),
7747                               mode=disk[constants.IDISK_MODE])
7748       disks.append(disk_dev)
7749   elif template_name == constants.DT_BLOCK:
7750     if len(secondary_nodes) != 0:
7751       raise errors.ProgrammerError("Wrong template configuration")
7752
7753     for idx, disk in enumerate(disk_info):
7754       disk_index = idx + base_index
7755       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7756                               size=disk[constants.IDISK_SIZE],
7757                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7758                                           disk[constants.IDISK_ADOPT]),
7759                               iv_name="disk/%d" % disk_index,
7760                               mode=disk[constants.IDISK_MODE])
7761       disks.append(disk_dev)
7762
7763   else:
7764     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7765   return disks
7766
7767
7768 def _GetInstanceInfoText(instance):
7769   """Compute that text that should be added to the disk's metadata.
7770
7771   """
7772   return "originstname+%s" % instance.name
7773
7774
7775 def _CalcEta(time_taken, written, total_size):
7776   """Calculates the ETA based on size written and total size.
7777
7778   @param time_taken: The time taken so far
7779   @param written: amount written so far
7780   @param total_size: The total size of data to be written
7781   @return: The remaining time in seconds
7782
7783   """
7784   avg_time = time_taken / float(written)
7785   return (total_size - written) * avg_time
7786
7787
7788 def _WipeDisks(lu, instance):
7789   """Wipes instance disks.
7790
7791   @type lu: L{LogicalUnit}
7792   @param lu: the logical unit on whose behalf we execute
7793   @type instance: L{objects.Instance}
7794   @param instance: the instance whose disks we should create
7795   @return: the success of the wipe
7796
7797   """
7798   node = instance.primary_node
7799
7800   for device in instance.disks:
7801     lu.cfg.SetDiskID(device, node)
7802
7803   logging.info("Pause sync of instance %s disks", instance.name)
7804   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7805
7806   for idx, success in enumerate(result.payload):
7807     if not success:
7808       logging.warn("pause-sync of instance %s for disks %d failed",
7809                    instance.name, idx)
7810
7811   try:
7812     for idx, device in enumerate(instance.disks):
7813       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7814       # MAX_WIPE_CHUNK at max
7815       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7816                             constants.MIN_WIPE_CHUNK_PERCENT)
7817       # we _must_ make this an int, otherwise rounding errors will
7818       # occur
7819       wipe_chunk_size = int(wipe_chunk_size)
7820
7821       lu.LogInfo("* Wiping disk %d", idx)
7822       logging.info("Wiping disk %d for instance %s, node %s using"
7823                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7824
7825       offset = 0
7826       size = device.size
7827       last_output = 0
7828       start_time = time.time()
7829
7830       while offset < size:
7831         wipe_size = min(wipe_chunk_size, size - offset)
7832         logging.debug("Wiping disk %d, offset %s, chunk %s",
7833                       idx, offset, wipe_size)
7834         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7835         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7836                      (idx, offset, wipe_size))
7837         now = time.time()
7838         offset += wipe_size
7839         if now - last_output >= 60:
7840           eta = _CalcEta(now - start_time, offset, size)
7841           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7842                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7843           last_output = now
7844   finally:
7845     logging.info("Resume sync of instance %s disks", instance.name)
7846
7847     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7848
7849     for idx, success in enumerate(result.payload):
7850       if not success:
7851         lu.LogWarning("Resume sync of disk %d failed, please have a"
7852                       " look at the status and troubleshoot the issue", idx)
7853         logging.warn("resume-sync of instance %s for disks %d failed",
7854                      instance.name, idx)
7855
7856
7857 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7858   """Create all disks for an instance.
7859
7860   This abstracts away some work from AddInstance.
7861
7862   @type lu: L{LogicalUnit}
7863   @param lu: the logical unit on whose behalf we execute
7864   @type instance: L{objects.Instance}
7865   @param instance: the instance whose disks we should create
7866   @type to_skip: list
7867   @param to_skip: list of indices to skip
7868   @type target_node: string
7869   @param target_node: if passed, overrides the target node for creation
7870   @rtype: boolean
7871   @return: the success of the creation
7872
7873   """
7874   info = _GetInstanceInfoText(instance)
7875   if target_node is None:
7876     pnode = instance.primary_node
7877     all_nodes = instance.all_nodes
7878   else:
7879     pnode = target_node
7880     all_nodes = [pnode]
7881
7882   if instance.disk_template in constants.DTS_FILEBASED:
7883     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7884     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7885
7886     result.Raise("Failed to create directory '%s' on"
7887                  " node %s" % (file_storage_dir, pnode))
7888
7889   # Note: this needs to be kept in sync with adding of disks in
7890   # LUInstanceSetParams
7891   for idx, device in enumerate(instance.disks):
7892     if to_skip and idx in to_skip:
7893       continue
7894     logging.info("Creating volume %s for instance %s",
7895                  device.iv_name, instance.name)
7896     #HARDCODE
7897     for node in all_nodes:
7898       f_create = node == pnode
7899       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7900
7901
7902 def _RemoveDisks(lu, instance, target_node=None):
7903   """Remove all disks for an instance.
7904
7905   This abstracts away some work from `AddInstance()` and
7906   `RemoveInstance()`. Note that in case some of the devices couldn't
7907   be removed, the removal will continue with the other ones (compare
7908   with `_CreateDisks()`).
7909
7910   @type lu: L{LogicalUnit}
7911   @param lu: the logical unit on whose behalf we execute
7912   @type instance: L{objects.Instance}
7913   @param instance: the instance whose disks we should remove
7914   @type target_node: string
7915   @param target_node: used to override the node on which to remove the disks
7916   @rtype: boolean
7917   @return: the success of the removal
7918
7919   """
7920   logging.info("Removing block devices for instance %s", instance.name)
7921
7922   all_result = True
7923   for device in instance.disks:
7924     if target_node:
7925       edata = [(target_node, device)]
7926     else:
7927       edata = device.ComputeNodeTree(instance.primary_node)
7928     for node, disk in edata:
7929       lu.cfg.SetDiskID(disk, node)
7930       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7931       if msg:
7932         lu.LogWarning("Could not remove block device %s on node %s,"
7933                       " continuing anyway: %s", device.iv_name, node, msg)
7934         all_result = False
7935
7936   if instance.disk_template == constants.DT_FILE:
7937     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7938     if target_node:
7939       tgt = target_node
7940     else:
7941       tgt = instance.primary_node
7942     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7943     if result.fail_msg:
7944       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7945                     file_storage_dir, instance.primary_node, result.fail_msg)
7946       all_result = False
7947
7948   return all_result
7949
7950
7951 def _ComputeDiskSizePerVG(disk_template, disks):
7952   """Compute disk size requirements in the volume group
7953
7954   """
7955   def _compute(disks, payload):
7956     """Universal algorithm.
7957
7958     """
7959     vgs = {}
7960     for disk in disks:
7961       vgs[disk[constants.IDISK_VG]] = \
7962         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7963
7964     return vgs
7965
7966   # Required free disk space as a function of disk and swap space
7967   req_size_dict = {
7968     constants.DT_DISKLESS: {},
7969     constants.DT_PLAIN: _compute(disks, 0),
7970     # 128 MB are added for drbd metadata for each disk
7971     constants.DT_DRBD8: _compute(disks, 128),
7972     constants.DT_FILE: {},
7973     constants.DT_SHARED_FILE: {},
7974   }
7975
7976   if disk_template not in req_size_dict:
7977     raise errors.ProgrammerError("Disk template '%s' size requirement"
7978                                  " is unknown" % disk_template)
7979
7980   return req_size_dict[disk_template]
7981
7982
7983 def _ComputeDiskSize(disk_template, disks):
7984   """Compute disk size requirements in the volume group
7985
7986   """
7987   # Required free disk space as a function of disk and swap space
7988   req_size_dict = {
7989     constants.DT_DISKLESS: None,
7990     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7991     # 128 MB are added for drbd metadata for each disk
7992     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7993     constants.DT_FILE: None,
7994     constants.DT_SHARED_FILE: 0,
7995     constants.DT_BLOCK: 0,
7996   }
7997
7998   if disk_template not in req_size_dict:
7999     raise errors.ProgrammerError("Disk template '%s' size requirement"
8000                                  " is unknown" % disk_template)
8001
8002   return req_size_dict[disk_template]
8003
8004
8005 def _FilterVmNodes(lu, nodenames):
8006   """Filters out non-vm_capable nodes from a list.
8007
8008   @type lu: L{LogicalUnit}
8009   @param lu: the logical unit for which we check
8010   @type nodenames: list
8011   @param nodenames: the list of nodes on which we should check
8012   @rtype: list
8013   @return: the list of vm-capable nodes
8014
8015   """
8016   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8017   return [name for name in nodenames if name not in vm_nodes]
8018
8019
8020 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8021   """Hypervisor parameter validation.
8022
8023   This function abstract the hypervisor parameter validation to be
8024   used in both instance create and instance modify.
8025
8026   @type lu: L{LogicalUnit}
8027   @param lu: the logical unit for which we check
8028   @type nodenames: list
8029   @param nodenames: the list of nodes on which we should check
8030   @type hvname: string
8031   @param hvname: the name of the hypervisor we should use
8032   @type hvparams: dict
8033   @param hvparams: the parameters which we need to check
8034   @raise errors.OpPrereqError: if the parameters are not valid
8035
8036   """
8037   nodenames = _FilterVmNodes(lu, nodenames)
8038   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8039                                                   hvname,
8040                                                   hvparams)
8041   for node in nodenames:
8042     info = hvinfo[node]
8043     if info.offline:
8044       continue
8045     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8046
8047
8048 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8049   """OS parameters validation.
8050
8051   @type lu: L{LogicalUnit}
8052   @param lu: the logical unit for which we check
8053   @type required: boolean
8054   @param required: whether the validation should fail if the OS is not
8055       found
8056   @type nodenames: list
8057   @param nodenames: the list of nodes on which we should check
8058   @type osname: string
8059   @param osname: the name of the hypervisor we should use
8060   @type osparams: dict
8061   @param osparams: the parameters which we need to check
8062   @raise errors.OpPrereqError: if the parameters are not valid
8063
8064   """
8065   nodenames = _FilterVmNodes(lu, nodenames)
8066   result = lu.rpc.call_os_validate(required, nodenames, osname,
8067                                    [constants.OS_VALIDATE_PARAMETERS],
8068                                    osparams)
8069   for node, nres in result.items():
8070     # we don't check for offline cases since this should be run only
8071     # against the master node and/or an instance's nodes
8072     nres.Raise("OS Parameters validation failed on node %s" % node)
8073     if not nres.payload:
8074       lu.LogInfo("OS %s not found on node %s, validation skipped",
8075                  osname, node)
8076
8077
8078 class LUInstanceCreate(LogicalUnit):
8079   """Create an instance.
8080
8081   """
8082   HPATH = "instance-add"
8083   HTYPE = constants.HTYPE_INSTANCE
8084   REQ_BGL = False
8085
8086   def CheckArguments(self):
8087     """Check arguments.
8088
8089     """
8090     # do not require name_check to ease forward/backward compatibility
8091     # for tools
8092     if self.op.no_install and self.op.start:
8093       self.LogInfo("No-installation mode selected, disabling startup")
8094       self.op.start = False
8095     # validate/normalize the instance name
8096     self.op.instance_name = \
8097       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8098
8099     if self.op.ip_check and not self.op.name_check:
8100       # TODO: make the ip check more flexible and not depend on the name check
8101       raise errors.OpPrereqError("Cannot do IP address check without a name"
8102                                  " check", errors.ECODE_INVAL)
8103
8104     # check nics' parameter names
8105     for nic in self.op.nics:
8106       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8107
8108     # check disks. parameter names and consistent adopt/no-adopt strategy
8109     has_adopt = has_no_adopt = False
8110     for disk in self.op.disks:
8111       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8112       if constants.IDISK_ADOPT in disk:
8113         has_adopt = True
8114       else:
8115         has_no_adopt = True
8116     if has_adopt and has_no_adopt:
8117       raise errors.OpPrereqError("Either all disks are adopted or none is",
8118                                  errors.ECODE_INVAL)
8119     if has_adopt:
8120       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8121         raise errors.OpPrereqError("Disk adoption is not supported for the"
8122                                    " '%s' disk template" %
8123                                    self.op.disk_template,
8124                                    errors.ECODE_INVAL)
8125       if self.op.iallocator is not None:
8126         raise errors.OpPrereqError("Disk adoption not allowed with an"
8127                                    " iallocator script", errors.ECODE_INVAL)
8128       if self.op.mode == constants.INSTANCE_IMPORT:
8129         raise errors.OpPrereqError("Disk adoption not allowed for"
8130                                    " instance import", errors.ECODE_INVAL)
8131     else:
8132       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8133         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8134                                    " but no 'adopt' parameter given" %
8135                                    self.op.disk_template,
8136                                    errors.ECODE_INVAL)
8137
8138     self.adopt_disks = has_adopt
8139
8140     # instance name verification
8141     if self.op.name_check:
8142       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8143       self.op.instance_name = self.hostname1.name
8144       # used in CheckPrereq for ip ping check
8145       self.check_ip = self.hostname1.ip
8146     else:
8147       self.check_ip = None
8148
8149     # file storage checks
8150     if (self.op.file_driver and
8151         not self.op.file_driver in constants.FILE_DRIVER):
8152       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8153                                  self.op.file_driver, errors.ECODE_INVAL)
8154
8155     if self.op.disk_template == constants.DT_FILE:
8156       opcodes.RequireFileStorage()
8157     elif self.op.disk_template == constants.DT_SHARED_FILE:
8158       opcodes.RequireSharedFileStorage()
8159
8160     ### Node/iallocator related checks
8161     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8162
8163     if self.op.pnode is not None:
8164       if self.op.disk_template in constants.DTS_INT_MIRROR:
8165         if self.op.snode is None:
8166           raise errors.OpPrereqError("The networked disk templates need"
8167                                      " a mirror node", errors.ECODE_INVAL)
8168       elif self.op.snode:
8169         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8170                         " template")
8171         self.op.snode = None
8172
8173     self._cds = _GetClusterDomainSecret()
8174
8175     if self.op.mode == constants.INSTANCE_IMPORT:
8176       # On import force_variant must be True, because if we forced it at
8177       # initial install, our only chance when importing it back is that it
8178       # works again!
8179       self.op.force_variant = True
8180
8181       if self.op.no_install:
8182         self.LogInfo("No-installation mode has no effect during import")
8183
8184     elif self.op.mode == constants.INSTANCE_CREATE:
8185       if self.op.os_type is None:
8186         raise errors.OpPrereqError("No guest OS specified",
8187                                    errors.ECODE_INVAL)
8188       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8189         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8190                                    " installation" % self.op.os_type,
8191                                    errors.ECODE_STATE)
8192       if self.op.disk_template is None:
8193         raise errors.OpPrereqError("No disk template specified",
8194                                    errors.ECODE_INVAL)
8195
8196     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8197       # Check handshake to ensure both clusters have the same domain secret
8198       src_handshake = self.op.source_handshake
8199       if not src_handshake:
8200         raise errors.OpPrereqError("Missing source handshake",
8201                                    errors.ECODE_INVAL)
8202
8203       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8204                                                            src_handshake)
8205       if errmsg:
8206         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8207                                    errors.ECODE_INVAL)
8208
8209       # Load and check source CA
8210       self.source_x509_ca_pem = self.op.source_x509_ca
8211       if not self.source_x509_ca_pem:
8212         raise errors.OpPrereqError("Missing source X509 CA",
8213                                    errors.ECODE_INVAL)
8214
8215       try:
8216         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8217                                                     self._cds)
8218       except OpenSSL.crypto.Error, err:
8219         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8220                                    (err, ), errors.ECODE_INVAL)
8221
8222       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8223       if errcode is not None:
8224         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8225                                    errors.ECODE_INVAL)
8226
8227       self.source_x509_ca = cert
8228
8229       src_instance_name = self.op.source_instance_name
8230       if not src_instance_name:
8231         raise errors.OpPrereqError("Missing source instance name",
8232                                    errors.ECODE_INVAL)
8233
8234       self.source_instance_name = \
8235           netutils.GetHostname(name=src_instance_name).name
8236
8237     else:
8238       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8239                                  self.op.mode, errors.ECODE_INVAL)
8240
8241   def ExpandNames(self):
8242     """ExpandNames for CreateInstance.
8243
8244     Figure out the right locks for instance creation.
8245
8246     """
8247     self.needed_locks = {}
8248
8249     instance_name = self.op.instance_name
8250     # this is just a preventive check, but someone might still add this
8251     # instance in the meantime, and creation will fail at lock-add time
8252     if instance_name in self.cfg.GetInstanceList():
8253       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8254                                  instance_name, errors.ECODE_EXISTS)
8255
8256     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8257
8258     if self.op.iallocator:
8259       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8260     else:
8261       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8262       nodelist = [self.op.pnode]
8263       if self.op.snode is not None:
8264         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8265         nodelist.append(self.op.snode)
8266       self.needed_locks[locking.LEVEL_NODE] = nodelist
8267
8268     # in case of import lock the source node too
8269     if self.op.mode == constants.INSTANCE_IMPORT:
8270       src_node = self.op.src_node
8271       src_path = self.op.src_path
8272
8273       if src_path is None:
8274         self.op.src_path = src_path = self.op.instance_name
8275
8276       if src_node is None:
8277         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8278         self.op.src_node = None
8279         if os.path.isabs(src_path):
8280           raise errors.OpPrereqError("Importing an instance from a path"
8281                                      " requires a source node option",
8282                                      errors.ECODE_INVAL)
8283       else:
8284         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8285         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8286           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8287         if not os.path.isabs(src_path):
8288           self.op.src_path = src_path = \
8289             utils.PathJoin(constants.EXPORT_DIR, src_path)
8290
8291   def _RunAllocator(self):
8292     """Run the allocator based on input opcode.
8293
8294     """
8295     nics = [n.ToDict() for n in self.nics]
8296     ial = IAllocator(self.cfg, self.rpc,
8297                      mode=constants.IALLOCATOR_MODE_ALLOC,
8298                      name=self.op.instance_name,
8299                      disk_template=self.op.disk_template,
8300                      tags=self.op.tags,
8301                      os=self.op.os_type,
8302                      vcpus=self.be_full[constants.BE_VCPUS],
8303                      memory=self.be_full[constants.BE_MEMORY],
8304                      disks=self.disks,
8305                      nics=nics,
8306                      hypervisor=self.op.hypervisor,
8307                      )
8308
8309     ial.Run(self.op.iallocator)
8310
8311     if not ial.success:
8312       raise errors.OpPrereqError("Can't compute nodes using"
8313                                  " iallocator '%s': %s" %
8314                                  (self.op.iallocator, ial.info),
8315                                  errors.ECODE_NORES)
8316     if len(ial.result) != ial.required_nodes:
8317       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8318                                  " of nodes (%s), required %s" %
8319                                  (self.op.iallocator, len(ial.result),
8320                                   ial.required_nodes), errors.ECODE_FAULT)
8321     self.op.pnode = ial.result[0]
8322     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8323                  self.op.instance_name, self.op.iallocator,
8324                  utils.CommaJoin(ial.result))
8325     if ial.required_nodes == 2:
8326       self.op.snode = ial.result[1]
8327
8328   def BuildHooksEnv(self):
8329     """Build hooks env.
8330
8331     This runs on master, primary and secondary nodes of the instance.
8332
8333     """
8334     env = {
8335       "ADD_MODE": self.op.mode,
8336       }
8337     if self.op.mode == constants.INSTANCE_IMPORT:
8338       env["SRC_NODE"] = self.op.src_node
8339       env["SRC_PATH"] = self.op.src_path
8340       env["SRC_IMAGES"] = self.src_images
8341
8342     env.update(_BuildInstanceHookEnv(
8343       name=self.op.instance_name,
8344       primary_node=self.op.pnode,
8345       secondary_nodes=self.secondaries,
8346       status=self.op.start,
8347       os_type=self.op.os_type,
8348       memory=self.be_full[constants.BE_MEMORY],
8349       vcpus=self.be_full[constants.BE_VCPUS],
8350       nics=_NICListToTuple(self, self.nics),
8351       disk_template=self.op.disk_template,
8352       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8353              for d in self.disks],
8354       bep=self.be_full,
8355       hvp=self.hv_full,
8356       hypervisor_name=self.op.hypervisor,
8357       tags=self.op.tags,
8358     ))
8359
8360     return env
8361
8362   def BuildHooksNodes(self):
8363     """Build hooks nodes.
8364
8365     """
8366     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8367     return nl, nl
8368
8369   def _ReadExportInfo(self):
8370     """Reads the export information from disk.
8371
8372     It will override the opcode source node and path with the actual
8373     information, if these two were not specified before.
8374
8375     @return: the export information
8376
8377     """
8378     assert self.op.mode == constants.INSTANCE_IMPORT
8379
8380     src_node = self.op.src_node
8381     src_path = self.op.src_path
8382
8383     if src_node is None:
8384       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8385       exp_list = self.rpc.call_export_list(locked_nodes)
8386       found = False
8387       for node in exp_list:
8388         if exp_list[node].fail_msg:
8389           continue
8390         if src_path in exp_list[node].payload:
8391           found = True
8392           self.op.src_node = src_node = node
8393           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8394                                                        src_path)
8395           break
8396       if not found:
8397         raise errors.OpPrereqError("No export found for relative path %s" %
8398                                     src_path, errors.ECODE_INVAL)
8399
8400     _CheckNodeOnline(self, src_node)
8401     result = self.rpc.call_export_info(src_node, src_path)
8402     result.Raise("No export or invalid export found in dir %s" % src_path)
8403
8404     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8405     if not export_info.has_section(constants.INISECT_EXP):
8406       raise errors.ProgrammerError("Corrupted export config",
8407                                    errors.ECODE_ENVIRON)
8408
8409     ei_version = export_info.get(constants.INISECT_EXP, "version")
8410     if (int(ei_version) != constants.EXPORT_VERSION):
8411       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8412                                  (ei_version, constants.EXPORT_VERSION),
8413                                  errors.ECODE_ENVIRON)
8414     return export_info
8415
8416   def _ReadExportParams(self, einfo):
8417     """Use export parameters as defaults.
8418
8419     In case the opcode doesn't specify (as in override) some instance
8420     parameters, then try to use them from the export information, if
8421     that declares them.
8422
8423     """
8424     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8425
8426     if self.op.disk_template is None:
8427       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8428         self.op.disk_template = einfo.get(constants.INISECT_INS,
8429                                           "disk_template")
8430       else:
8431         raise errors.OpPrereqError("No disk template specified and the export"
8432                                    " is missing the disk_template information",
8433                                    errors.ECODE_INVAL)
8434
8435     if not self.op.disks:
8436       if einfo.has_option(constants.INISECT_INS, "disk_count"):
8437         disks = []
8438         # TODO: import the disk iv_name too
8439         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8440           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8441           disks.append({constants.IDISK_SIZE: disk_sz})
8442         self.op.disks = disks
8443       else:
8444         raise errors.OpPrereqError("No disk info specified and the export"
8445                                    " is missing the disk information",
8446                                    errors.ECODE_INVAL)
8447
8448     if (not self.op.nics and
8449         einfo.has_option(constants.INISECT_INS, "nic_count")):
8450       nics = []
8451       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8452         ndict = {}
8453         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8454           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8455           ndict[name] = v
8456         nics.append(ndict)
8457       self.op.nics = nics
8458
8459     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8460       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8461
8462     if (self.op.hypervisor is None and
8463         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8464       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8465
8466     if einfo.has_section(constants.INISECT_HYP):
8467       # use the export parameters but do not override the ones
8468       # specified by the user
8469       for name, value in einfo.items(constants.INISECT_HYP):
8470         if name not in self.op.hvparams:
8471           self.op.hvparams[name] = value
8472
8473     if einfo.has_section(constants.INISECT_BEP):
8474       # use the parameters, without overriding
8475       for name, value in einfo.items(constants.INISECT_BEP):
8476         if name not in self.op.beparams:
8477           self.op.beparams[name] = value
8478     else:
8479       # try to read the parameters old style, from the main section
8480       for name in constants.BES_PARAMETERS:
8481         if (name not in self.op.beparams and
8482             einfo.has_option(constants.INISECT_INS, name)):
8483           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8484
8485     if einfo.has_section(constants.INISECT_OSP):
8486       # use the parameters, without overriding
8487       for name, value in einfo.items(constants.INISECT_OSP):
8488         if name not in self.op.osparams:
8489           self.op.osparams[name] = value
8490
8491   def _RevertToDefaults(self, cluster):
8492     """Revert the instance parameters to the default values.
8493
8494     """
8495     # hvparams
8496     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8497     for name in self.op.hvparams.keys():
8498       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8499         del self.op.hvparams[name]
8500     # beparams
8501     be_defs = cluster.SimpleFillBE({})
8502     for name in self.op.beparams.keys():
8503       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8504         del self.op.beparams[name]
8505     # nic params
8506     nic_defs = cluster.SimpleFillNIC({})
8507     for nic in self.op.nics:
8508       for name in constants.NICS_PARAMETERS:
8509         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8510           del nic[name]
8511     # osparams
8512     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8513     for name in self.op.osparams.keys():
8514       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8515         del self.op.osparams[name]
8516
8517   def _CalculateFileStorageDir(self):
8518     """Calculate final instance file storage dir.
8519
8520     """
8521     # file storage dir calculation/check
8522     self.instance_file_storage_dir = None
8523     if self.op.disk_template in constants.DTS_FILEBASED:
8524       # build the full file storage dir path
8525       joinargs = []
8526
8527       if self.op.disk_template == constants.DT_SHARED_FILE:
8528         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8529       else:
8530         get_fsd_fn = self.cfg.GetFileStorageDir
8531
8532       cfg_storagedir = get_fsd_fn()
8533       if not cfg_storagedir:
8534         raise errors.OpPrereqError("Cluster file storage dir not defined")
8535       joinargs.append(cfg_storagedir)
8536
8537       if self.op.file_storage_dir is not None:
8538         joinargs.append(self.op.file_storage_dir)
8539
8540       joinargs.append(self.op.instance_name)
8541
8542       # pylint: disable=W0142
8543       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8544
8545   def CheckPrereq(self):
8546     """Check prerequisites.
8547
8548     """
8549     self._CalculateFileStorageDir()
8550
8551     if self.op.mode == constants.INSTANCE_IMPORT:
8552       export_info = self._ReadExportInfo()
8553       self._ReadExportParams(export_info)
8554
8555     if (not self.cfg.GetVGName() and
8556         self.op.disk_template not in constants.DTS_NOT_LVM):
8557       raise errors.OpPrereqError("Cluster does not support lvm-based"
8558                                  " instances", errors.ECODE_STATE)
8559
8560     if self.op.hypervisor is None:
8561       self.op.hypervisor = self.cfg.GetHypervisorType()
8562
8563     cluster = self.cfg.GetClusterInfo()
8564     enabled_hvs = cluster.enabled_hypervisors
8565     if self.op.hypervisor not in enabled_hvs:
8566       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8567                                  " cluster (%s)" % (self.op.hypervisor,
8568                                   ",".join(enabled_hvs)),
8569                                  errors.ECODE_STATE)
8570
8571     # Check tag validity
8572     for tag in self.op.tags:
8573       objects.TaggableObject.ValidateTag(tag)
8574
8575     # check hypervisor parameter syntax (locally)
8576     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8577     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8578                                       self.op.hvparams)
8579     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8580     hv_type.CheckParameterSyntax(filled_hvp)
8581     self.hv_full = filled_hvp
8582     # check that we don't specify global parameters on an instance
8583     _CheckGlobalHvParams(self.op.hvparams)
8584
8585     # fill and remember the beparams dict
8586     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8587     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8588
8589     # build os parameters
8590     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8591
8592     # now that hvp/bep are in final format, let's reset to defaults,
8593     # if told to do so
8594     if self.op.identify_defaults:
8595       self._RevertToDefaults(cluster)
8596
8597     # NIC buildup
8598     self.nics = []
8599     for idx, nic in enumerate(self.op.nics):
8600       nic_mode_req = nic.get(constants.INIC_MODE, None)
8601       nic_mode = nic_mode_req
8602       if nic_mode is None:
8603         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8604
8605       # in routed mode, for the first nic, the default ip is 'auto'
8606       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8607         default_ip_mode = constants.VALUE_AUTO
8608       else:
8609         default_ip_mode = constants.VALUE_NONE
8610
8611       # ip validity checks
8612       ip = nic.get(constants.INIC_IP, default_ip_mode)
8613       if ip is None or ip.lower() == constants.VALUE_NONE:
8614         nic_ip = None
8615       elif ip.lower() == constants.VALUE_AUTO:
8616         if not self.op.name_check:
8617           raise errors.OpPrereqError("IP address set to auto but name checks"
8618                                      " have been skipped",
8619                                      errors.ECODE_INVAL)
8620         nic_ip = self.hostname1.ip
8621       else:
8622         if not netutils.IPAddress.IsValid(ip):
8623           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8624                                      errors.ECODE_INVAL)
8625         nic_ip = ip
8626
8627       # TODO: check the ip address for uniqueness
8628       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8629         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8630                                    errors.ECODE_INVAL)
8631
8632       # MAC address verification
8633       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8634       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8635         mac = utils.NormalizeAndValidateMac(mac)
8636
8637         try:
8638           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8639         except errors.ReservationError:
8640           raise errors.OpPrereqError("MAC address %s already in use"
8641                                      " in cluster" % mac,
8642                                      errors.ECODE_NOTUNIQUE)
8643
8644       #  Build nic parameters
8645       link = nic.get(constants.INIC_LINK, None)
8646       nicparams = {}
8647       if nic_mode_req:
8648         nicparams[constants.NIC_MODE] = nic_mode_req
8649       if link:
8650         nicparams[constants.NIC_LINK] = link
8651
8652       check_params = cluster.SimpleFillNIC(nicparams)
8653       objects.NIC.CheckParameterSyntax(check_params)
8654       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8655
8656     # disk checks/pre-build
8657     default_vg = self.cfg.GetVGName()
8658     self.disks = []
8659     for disk in self.op.disks:
8660       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8661       if mode not in constants.DISK_ACCESS_SET:
8662         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8663                                    mode, errors.ECODE_INVAL)
8664       size = disk.get(constants.IDISK_SIZE, None)
8665       if size is None:
8666         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8667       try:
8668         size = int(size)
8669       except (TypeError, ValueError):
8670         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8671                                    errors.ECODE_INVAL)
8672
8673       data_vg = disk.get(constants.IDISK_VG, default_vg)
8674       new_disk = {
8675         constants.IDISK_SIZE: size,
8676         constants.IDISK_MODE: mode,
8677         constants.IDISK_VG: data_vg,
8678         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8679         }
8680       if constants.IDISK_ADOPT in disk:
8681         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8682       self.disks.append(new_disk)
8683
8684     if self.op.mode == constants.INSTANCE_IMPORT:
8685
8686       # Check that the new instance doesn't have less disks than the export
8687       instance_disks = len(self.disks)
8688       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8689       if instance_disks < export_disks:
8690         raise errors.OpPrereqError("Not enough disks to import."
8691                                    " (instance: %d, export: %d)" %
8692                                    (instance_disks, export_disks),
8693                                    errors.ECODE_INVAL)
8694
8695       disk_images = []
8696       for idx in range(export_disks):
8697         option = "disk%d_dump" % idx
8698         if export_info.has_option(constants.INISECT_INS, option):
8699           # FIXME: are the old os-es, disk sizes, etc. useful?
8700           export_name = export_info.get(constants.INISECT_INS, option)
8701           image = utils.PathJoin(self.op.src_path, export_name)
8702           disk_images.append(image)
8703         else:
8704           disk_images.append(False)
8705
8706       self.src_images = disk_images
8707
8708       old_name = export_info.get(constants.INISECT_INS, "name")
8709       try:
8710         exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8711       except (TypeError, ValueError), err:
8712         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8713                                    " an integer: %s" % str(err),
8714                                    errors.ECODE_STATE)
8715       if self.op.instance_name == old_name:
8716         for idx, nic in enumerate(self.nics):
8717           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8718             nic_mac_ini = "nic%d_mac" % idx
8719             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8720
8721     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8722
8723     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8724     if self.op.ip_check:
8725       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8726         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8727                                    (self.check_ip, self.op.instance_name),
8728                                    errors.ECODE_NOTUNIQUE)
8729
8730     #### mac address generation
8731     # By generating here the mac address both the allocator and the hooks get
8732     # the real final mac address rather than the 'auto' or 'generate' value.
8733     # There is a race condition between the generation and the instance object
8734     # creation, which means that we know the mac is valid now, but we're not
8735     # sure it will be when we actually add the instance. If things go bad
8736     # adding the instance will abort because of a duplicate mac, and the
8737     # creation job will fail.
8738     for nic in self.nics:
8739       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8740         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8741
8742     #### allocator run
8743
8744     if self.op.iallocator is not None:
8745       self._RunAllocator()
8746
8747     #### node related checks
8748
8749     # check primary node
8750     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8751     assert self.pnode is not None, \
8752       "Cannot retrieve locked node %s" % self.op.pnode
8753     if pnode.offline:
8754       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8755                                  pnode.name, errors.ECODE_STATE)
8756     if pnode.drained:
8757       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8758                                  pnode.name, errors.ECODE_STATE)
8759     if not pnode.vm_capable:
8760       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8761                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8762
8763     self.secondaries = []
8764
8765     # mirror node verification
8766     if self.op.disk_template in constants.DTS_INT_MIRROR:
8767       if self.op.snode == pnode.name:
8768         raise errors.OpPrereqError("The secondary node cannot be the"
8769                                    " primary node", errors.ECODE_INVAL)
8770       _CheckNodeOnline(self, self.op.snode)
8771       _CheckNodeNotDrained(self, self.op.snode)
8772       _CheckNodeVmCapable(self, self.op.snode)
8773       self.secondaries.append(self.op.snode)
8774
8775     nodenames = [pnode.name] + self.secondaries
8776
8777     if not self.adopt_disks:
8778       # Check lv size requirements, if not adopting
8779       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8780       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8781
8782     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8783       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8784                                 disk[constants.IDISK_ADOPT])
8785                      for disk in self.disks])
8786       if len(all_lvs) != len(self.disks):
8787         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8788                                    errors.ECODE_INVAL)
8789       for lv_name in all_lvs:
8790         try:
8791           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8792           # to ReserveLV uses the same syntax
8793           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8794         except errors.ReservationError:
8795           raise errors.OpPrereqError("LV named %s used by another instance" %
8796                                      lv_name, errors.ECODE_NOTUNIQUE)
8797
8798       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8799       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8800
8801       node_lvs = self.rpc.call_lv_list([pnode.name],
8802                                        vg_names.payload.keys())[pnode.name]
8803       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8804       node_lvs = node_lvs.payload
8805
8806       delta = all_lvs.difference(node_lvs.keys())
8807       if delta:
8808         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8809                                    utils.CommaJoin(delta),
8810                                    errors.ECODE_INVAL)
8811       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8812       if online_lvs:
8813         raise errors.OpPrereqError("Online logical volumes found, cannot"
8814                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8815                                    errors.ECODE_STATE)
8816       # update the size of disk based on what is found
8817       for dsk in self.disks:
8818         dsk[constants.IDISK_SIZE] = \
8819           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8820                                         dsk[constants.IDISK_ADOPT])][0]))
8821
8822     elif self.op.disk_template == constants.DT_BLOCK:
8823       # Normalize and de-duplicate device paths
8824       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8825                        for disk in self.disks])
8826       if len(all_disks) != len(self.disks):
8827         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8828                                    errors.ECODE_INVAL)
8829       baddisks = [d for d in all_disks
8830                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8831       if baddisks:
8832         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8833                                    " cannot be adopted" %
8834                                    (", ".join(baddisks),
8835                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8836                                    errors.ECODE_INVAL)
8837
8838       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8839                                             list(all_disks))[pnode.name]
8840       node_disks.Raise("Cannot get block device information from node %s" %
8841                        pnode.name)
8842       node_disks = node_disks.payload
8843       delta = all_disks.difference(node_disks.keys())
8844       if delta:
8845         raise errors.OpPrereqError("Missing block device(s): %s" %
8846                                    utils.CommaJoin(delta),
8847                                    errors.ECODE_INVAL)
8848       for dsk in self.disks:
8849         dsk[constants.IDISK_SIZE] = \
8850           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8851
8852     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8853
8854     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8855     # check OS parameters (remotely)
8856     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8857
8858     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8859
8860     # memory check on primary node
8861     if self.op.start:
8862       _CheckNodeFreeMemory(self, self.pnode.name,
8863                            "creating instance %s" % self.op.instance_name,
8864                            self.be_full[constants.BE_MEMORY],
8865                            self.op.hypervisor)
8866
8867     self.dry_run_result = list(nodenames)
8868
8869   def Exec(self, feedback_fn):
8870     """Create and add the instance to the cluster.
8871
8872     """
8873     instance = self.op.instance_name
8874     pnode_name = self.pnode.name
8875
8876     ht_kind = self.op.hypervisor
8877     if ht_kind in constants.HTS_REQ_PORT:
8878       network_port = self.cfg.AllocatePort()
8879     else:
8880       network_port = None
8881
8882     disks = _GenerateDiskTemplate(self,
8883                                   self.op.disk_template,
8884                                   instance, pnode_name,
8885                                   self.secondaries,
8886                                   self.disks,
8887                                   self.instance_file_storage_dir,
8888                                   self.op.file_driver,
8889                                   0,
8890                                   feedback_fn)
8891
8892     iobj = objects.Instance(name=instance, os=self.op.os_type,
8893                             primary_node=pnode_name,
8894                             nics=self.nics, disks=disks,
8895                             disk_template=self.op.disk_template,
8896                             admin_up=False,
8897                             network_port=network_port,
8898                             beparams=self.op.beparams,
8899                             hvparams=self.op.hvparams,
8900                             hypervisor=self.op.hypervisor,
8901                             osparams=self.op.osparams,
8902                             )
8903
8904     if self.op.tags:
8905       for tag in self.op.tags:
8906         iobj.AddTag(tag)
8907
8908     if self.adopt_disks:
8909       if self.op.disk_template == constants.DT_PLAIN:
8910         # rename LVs to the newly-generated names; we need to construct
8911         # 'fake' LV disks with the old data, plus the new unique_id
8912         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8913         rename_to = []
8914         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8915           rename_to.append(t_dsk.logical_id)
8916           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8917           self.cfg.SetDiskID(t_dsk, pnode_name)
8918         result = self.rpc.call_blockdev_rename(pnode_name,
8919                                                zip(tmp_disks, rename_to))
8920         result.Raise("Failed to rename adoped LVs")
8921     else:
8922       feedback_fn("* creating instance disks...")
8923       try:
8924         _CreateDisks(self, iobj)
8925       except errors.OpExecError:
8926         self.LogWarning("Device creation failed, reverting...")
8927         try:
8928           _RemoveDisks(self, iobj)
8929         finally:
8930           self.cfg.ReleaseDRBDMinors(instance)
8931           raise
8932
8933     feedback_fn("adding instance %s to cluster config" % instance)
8934
8935     self.cfg.AddInstance(iobj, self.proc.GetECId())
8936
8937     # Declare that we don't want to remove the instance lock anymore, as we've
8938     # added the instance to the config
8939     del self.remove_locks[locking.LEVEL_INSTANCE]
8940
8941     if self.op.mode == constants.INSTANCE_IMPORT:
8942       # Release unused nodes
8943       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8944     else:
8945       # Release all nodes
8946       _ReleaseLocks(self, locking.LEVEL_NODE)
8947
8948     disk_abort = False
8949     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8950       feedback_fn("* wiping instance disks...")
8951       try:
8952         _WipeDisks(self, iobj)
8953       except errors.OpExecError, err:
8954         logging.exception("Wiping disks failed")
8955         self.LogWarning("Wiping instance disks failed (%s)", err)
8956         disk_abort = True
8957
8958     if disk_abort:
8959       # Something is already wrong with the disks, don't do anything else
8960       pass
8961     elif self.op.wait_for_sync:
8962       disk_abort = not _WaitForSync(self, iobj)
8963     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8964       # make sure the disks are not degraded (still sync-ing is ok)
8965       feedback_fn("* checking mirrors status")
8966       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8967     else:
8968       disk_abort = False
8969
8970     if disk_abort:
8971       _RemoveDisks(self, iobj)
8972       self.cfg.RemoveInstance(iobj.name)
8973       # Make sure the instance lock gets removed
8974       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8975       raise errors.OpExecError("There are some degraded disks for"
8976                                " this instance")
8977
8978     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8979       if self.op.mode == constants.INSTANCE_CREATE:
8980         if not self.op.no_install:
8981           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
8982                         not self.op.wait_for_sync)
8983           if pause_sync:
8984             feedback_fn("* pausing disk sync to install instance OS")
8985             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8986                                                               iobj.disks, True)
8987             for idx, success in enumerate(result.payload):
8988               if not success:
8989                 logging.warn("pause-sync of instance %s for disk %d failed",
8990                              instance, idx)
8991
8992           feedback_fn("* running the instance OS create scripts...")
8993           # FIXME: pass debug option from opcode to backend
8994           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8995                                                  self.op.debug_level)
8996           if pause_sync:
8997             feedback_fn("* resuming disk sync")
8998             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8999                                                               iobj.disks, False)
9000             for idx, success in enumerate(result.payload):
9001               if not success:
9002                 logging.warn("resume-sync of instance %s for disk %d failed",
9003                              instance, idx)
9004
9005           result.Raise("Could not add os for instance %s"
9006                        " on node %s" % (instance, pnode_name))
9007
9008       elif self.op.mode == constants.INSTANCE_IMPORT:
9009         feedback_fn("* running the instance OS import scripts...")
9010
9011         transfers = []
9012
9013         for idx, image in enumerate(self.src_images):
9014           if not image:
9015             continue
9016
9017           # FIXME: pass debug option from opcode to backend
9018           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9019                                              constants.IEIO_FILE, (image, ),
9020                                              constants.IEIO_SCRIPT,
9021                                              (iobj.disks[idx], idx),
9022                                              None)
9023           transfers.append(dt)
9024
9025         import_result = \
9026           masterd.instance.TransferInstanceData(self, feedback_fn,
9027                                                 self.op.src_node, pnode_name,
9028                                                 self.pnode.secondary_ip,
9029                                                 iobj, transfers)
9030         if not compat.all(import_result):
9031           self.LogWarning("Some disks for instance %s on node %s were not"
9032                           " imported successfully" % (instance, pnode_name))
9033
9034       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9035         feedback_fn("* preparing remote import...")
9036         # The source cluster will stop the instance before attempting to make a
9037         # connection. In some cases stopping an instance can take a long time,
9038         # hence the shutdown timeout is added to the connection timeout.
9039         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9040                            self.op.source_shutdown_timeout)
9041         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9042
9043         assert iobj.primary_node == self.pnode.name
9044         disk_results = \
9045           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9046                                         self.source_x509_ca,
9047                                         self._cds, timeouts)
9048         if not compat.all(disk_results):
9049           # TODO: Should the instance still be started, even if some disks
9050           # failed to import (valid for local imports, too)?
9051           self.LogWarning("Some disks for instance %s on node %s were not"
9052                           " imported successfully" % (instance, pnode_name))
9053
9054         # Run rename script on newly imported instance
9055         assert iobj.name == instance
9056         feedback_fn("Running rename script for %s" % instance)
9057         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9058                                                    self.source_instance_name,
9059                                                    self.op.debug_level)
9060         if result.fail_msg:
9061           self.LogWarning("Failed to run rename script for %s on node"
9062                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9063
9064       else:
9065         # also checked in the prereq part
9066         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9067                                      % self.op.mode)
9068
9069     if self.op.start:
9070       iobj.admin_up = True
9071       self.cfg.Update(iobj, feedback_fn)
9072       logging.info("Starting instance %s on node %s", instance, pnode_name)
9073       feedback_fn("* starting instance...")
9074       result = self.rpc.call_instance_start(pnode_name, iobj,
9075                                             None, None, False)
9076       result.Raise("Could not start instance")
9077
9078     return list(iobj.all_nodes)
9079
9080
9081 class LUInstanceConsole(NoHooksLU):
9082   """Connect to an instance's console.
9083
9084   This is somewhat special in that it returns the command line that
9085   you need to run on the master node in order to connect to the
9086   console.
9087
9088   """
9089   REQ_BGL = False
9090
9091   def ExpandNames(self):
9092     self._ExpandAndLockInstance()
9093
9094   def CheckPrereq(self):
9095     """Check prerequisites.
9096
9097     This checks that the instance is in the cluster.
9098
9099     """
9100     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9101     assert self.instance is not None, \
9102       "Cannot retrieve locked instance %s" % self.op.instance_name
9103     _CheckNodeOnline(self, self.instance.primary_node)
9104
9105   def Exec(self, feedback_fn):
9106     """Connect to the console of an instance
9107
9108     """
9109     instance = self.instance
9110     node = instance.primary_node
9111
9112     node_insts = self.rpc.call_instance_list([node],
9113                                              [instance.hypervisor])[node]
9114     node_insts.Raise("Can't get node information from %s" % node)
9115
9116     if instance.name not in node_insts.payload:
9117       if instance.admin_up:
9118         state = constants.INSTST_ERRORDOWN
9119       else:
9120         state = constants.INSTST_ADMINDOWN
9121       raise errors.OpExecError("Instance %s is not running (state %s)" %
9122                                (instance.name, state))
9123
9124     logging.debug("Connecting to console of %s on %s", instance.name, node)
9125
9126     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9127
9128
9129 def _GetInstanceConsole(cluster, instance):
9130   """Returns console information for an instance.
9131
9132   @type cluster: L{objects.Cluster}
9133   @type instance: L{objects.Instance}
9134   @rtype: dict
9135
9136   """
9137   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9138   # beparams and hvparams are passed separately, to avoid editing the
9139   # instance and then saving the defaults in the instance itself.
9140   hvparams = cluster.FillHV(instance)
9141   beparams = cluster.FillBE(instance)
9142   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9143
9144   assert console.instance == instance.name
9145   assert console.Validate()
9146
9147   return console.ToDict()
9148
9149
9150 class LUInstanceReplaceDisks(LogicalUnit):
9151   """Replace the disks of an instance.
9152
9153   """
9154   HPATH = "mirrors-replace"
9155   HTYPE = constants.HTYPE_INSTANCE
9156   REQ_BGL = False
9157
9158   def CheckArguments(self):
9159     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9160                                   self.op.iallocator)
9161
9162   def ExpandNames(self):
9163     self._ExpandAndLockInstance()
9164
9165     assert locking.LEVEL_NODE not in self.needed_locks
9166     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9167
9168     assert self.op.iallocator is None or self.op.remote_node is None, \
9169       "Conflicting options"
9170
9171     if self.op.remote_node is not None:
9172       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9173
9174       # Warning: do not remove the locking of the new secondary here
9175       # unless DRBD8.AddChildren is changed to work in parallel;
9176       # currently it doesn't since parallel invocations of
9177       # FindUnusedMinor will conflict
9178       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9179       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9180     else:
9181       self.needed_locks[locking.LEVEL_NODE] = []
9182       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9183
9184       if self.op.iallocator is not None:
9185         # iallocator will select a new node in the same group
9186         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9187
9188     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9189                                    self.op.iallocator, self.op.remote_node,
9190                                    self.op.disks, False, self.op.early_release)
9191
9192     self.tasklets = [self.replacer]
9193
9194   def DeclareLocks(self, level):
9195     if level == locking.LEVEL_NODEGROUP:
9196       assert self.op.remote_node is None
9197       assert self.op.iallocator is not None
9198       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9199
9200       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9201       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9202         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9203
9204     elif level == locking.LEVEL_NODE:
9205       if self.op.iallocator is not None:
9206         assert self.op.remote_node is None
9207         assert not self.needed_locks[locking.LEVEL_NODE]
9208
9209         # Lock member nodes of all locked groups
9210         self.needed_locks[locking.LEVEL_NODE] = [node_name
9211           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9212           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9213       else:
9214         self._LockInstancesNodes()
9215
9216   def BuildHooksEnv(self):
9217     """Build hooks env.
9218
9219     This runs on the master, the primary and all the secondaries.
9220
9221     """
9222     instance = self.replacer.instance
9223     env = {
9224       "MODE": self.op.mode,
9225       "NEW_SECONDARY": self.op.remote_node,
9226       "OLD_SECONDARY": instance.secondary_nodes[0],
9227       }
9228     env.update(_BuildInstanceHookEnvByObject(self, instance))
9229     return env
9230
9231   def BuildHooksNodes(self):
9232     """Build hooks nodes.
9233
9234     """
9235     instance = self.replacer.instance
9236     nl = [
9237       self.cfg.GetMasterNode(),
9238       instance.primary_node,
9239       ]
9240     if self.op.remote_node is not None:
9241       nl.append(self.op.remote_node)
9242     return nl, nl
9243
9244   def CheckPrereq(self):
9245     """Check prerequisites.
9246
9247     """
9248     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9249             self.op.iallocator is None)
9250
9251     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9252     if owned_groups:
9253       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9254
9255     return LogicalUnit.CheckPrereq(self)
9256
9257
9258 class TLReplaceDisks(Tasklet):
9259   """Replaces disks for an instance.
9260
9261   Note: Locking is not within the scope of this class.
9262
9263   """
9264   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9265                disks, delay_iallocator, early_release):
9266     """Initializes this class.
9267
9268     """
9269     Tasklet.__init__(self, lu)
9270
9271     # Parameters
9272     self.instance_name = instance_name
9273     self.mode = mode
9274     self.iallocator_name = iallocator_name
9275     self.remote_node = remote_node
9276     self.disks = disks
9277     self.delay_iallocator = delay_iallocator
9278     self.early_release = early_release
9279
9280     # Runtime data
9281     self.instance = None
9282     self.new_node = None
9283     self.target_node = None
9284     self.other_node = None
9285     self.remote_node_info = None
9286     self.node_secondary_ip = None
9287
9288   @staticmethod
9289   def CheckArguments(mode, remote_node, iallocator):
9290     """Helper function for users of this class.
9291
9292     """
9293     # check for valid parameter combination
9294     if mode == constants.REPLACE_DISK_CHG:
9295       if remote_node is None and iallocator is None:
9296         raise errors.OpPrereqError("When changing the secondary either an"
9297                                    " iallocator script must be used or the"
9298                                    " new node given", errors.ECODE_INVAL)
9299
9300       if remote_node is not None and iallocator is not None:
9301         raise errors.OpPrereqError("Give either the iallocator or the new"
9302                                    " secondary, not both", errors.ECODE_INVAL)
9303
9304     elif remote_node is not None or iallocator is not None:
9305       # Not replacing the secondary
9306       raise errors.OpPrereqError("The iallocator and new node options can"
9307                                  " only be used when changing the"
9308                                  " secondary node", errors.ECODE_INVAL)
9309
9310   @staticmethod
9311   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9312     """Compute a new secondary node using an IAllocator.
9313
9314     """
9315     ial = IAllocator(lu.cfg, lu.rpc,
9316                      mode=constants.IALLOCATOR_MODE_RELOC,
9317                      name=instance_name,
9318                      relocate_from=list(relocate_from))
9319
9320     ial.Run(iallocator_name)
9321
9322     if not ial.success:
9323       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9324                                  " %s" % (iallocator_name, ial.info),
9325                                  errors.ECODE_NORES)
9326
9327     if len(ial.result) != ial.required_nodes:
9328       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9329                                  " of nodes (%s), required %s" %
9330                                  (iallocator_name,
9331                                   len(ial.result), ial.required_nodes),
9332                                  errors.ECODE_FAULT)
9333
9334     remote_node_name = ial.result[0]
9335
9336     lu.LogInfo("Selected new secondary for instance '%s': %s",
9337                instance_name, remote_node_name)
9338
9339     return remote_node_name
9340
9341   def _FindFaultyDisks(self, node_name):
9342     """Wrapper for L{_FindFaultyInstanceDisks}.
9343
9344     """
9345     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9346                                     node_name, True)
9347
9348   def _CheckDisksActivated(self, instance):
9349     """Checks if the instance disks are activated.
9350
9351     @param instance: The instance to check disks
9352     @return: True if they are activated, False otherwise
9353
9354     """
9355     nodes = instance.all_nodes
9356
9357     for idx, dev in enumerate(instance.disks):
9358       for node in nodes:
9359         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9360         self.cfg.SetDiskID(dev, node)
9361
9362         result = self.rpc.call_blockdev_find(node, dev)
9363
9364         if result.offline:
9365           continue
9366         elif result.fail_msg or not result.payload:
9367           return False
9368
9369     return True
9370
9371   def CheckPrereq(self):
9372     """Check prerequisites.
9373
9374     This checks that the instance is in the cluster.
9375
9376     """
9377     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9378     assert instance is not None, \
9379       "Cannot retrieve locked instance %s" % self.instance_name
9380
9381     if instance.disk_template != constants.DT_DRBD8:
9382       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9383                                  " instances", errors.ECODE_INVAL)
9384
9385     if len(instance.secondary_nodes) != 1:
9386       raise errors.OpPrereqError("The instance has a strange layout,"
9387                                  " expected one secondary but found %d" %
9388                                  len(instance.secondary_nodes),
9389                                  errors.ECODE_FAULT)
9390
9391     if not self.delay_iallocator:
9392       self._CheckPrereq2()
9393
9394   def _CheckPrereq2(self):
9395     """Check prerequisites, second part.
9396
9397     This function should always be part of CheckPrereq. It was separated and is
9398     now called from Exec because during node evacuation iallocator was only
9399     called with an unmodified cluster model, not taking planned changes into
9400     account.
9401
9402     """
9403     instance = self.instance
9404     secondary_node = instance.secondary_nodes[0]
9405
9406     if self.iallocator_name is None:
9407       remote_node = self.remote_node
9408     else:
9409       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9410                                        instance.name, instance.secondary_nodes)
9411
9412     if remote_node is None:
9413       self.remote_node_info = None
9414     else:
9415       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9416              "Remote node '%s' is not locked" % remote_node
9417
9418       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9419       assert self.remote_node_info is not None, \
9420         "Cannot retrieve locked node %s" % remote_node
9421
9422     if remote_node == self.instance.primary_node:
9423       raise errors.OpPrereqError("The specified node is the primary node of"
9424                                  " the instance", errors.ECODE_INVAL)
9425
9426     if remote_node == secondary_node:
9427       raise errors.OpPrereqError("The specified node is already the"
9428                                  " secondary node of the instance",
9429                                  errors.ECODE_INVAL)
9430
9431     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9432                                     constants.REPLACE_DISK_CHG):
9433       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9434                                  errors.ECODE_INVAL)
9435
9436     if self.mode == constants.REPLACE_DISK_AUTO:
9437       if not self._CheckDisksActivated(instance):
9438         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9439                                    " first" % self.instance_name,
9440                                    errors.ECODE_STATE)
9441       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9442       faulty_secondary = self._FindFaultyDisks(secondary_node)
9443
9444       if faulty_primary and faulty_secondary:
9445         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9446                                    " one node and can not be repaired"
9447                                    " automatically" % self.instance_name,
9448                                    errors.ECODE_STATE)
9449
9450       if faulty_primary:
9451         self.disks = faulty_primary
9452         self.target_node = instance.primary_node
9453         self.other_node = secondary_node
9454         check_nodes = [self.target_node, self.other_node]
9455       elif faulty_secondary:
9456         self.disks = faulty_secondary
9457         self.target_node = secondary_node
9458         self.other_node = instance.primary_node
9459         check_nodes = [self.target_node, self.other_node]
9460       else:
9461         self.disks = []
9462         check_nodes = []
9463
9464     else:
9465       # Non-automatic modes
9466       if self.mode == constants.REPLACE_DISK_PRI:
9467         self.target_node = instance.primary_node
9468         self.other_node = secondary_node
9469         check_nodes = [self.target_node, self.other_node]
9470
9471       elif self.mode == constants.REPLACE_DISK_SEC:
9472         self.target_node = secondary_node
9473         self.other_node = instance.primary_node
9474         check_nodes = [self.target_node, self.other_node]
9475
9476       elif self.mode == constants.REPLACE_DISK_CHG:
9477         self.new_node = remote_node
9478         self.other_node = instance.primary_node
9479         self.target_node = secondary_node
9480         check_nodes = [self.new_node, self.other_node]
9481
9482         _CheckNodeNotDrained(self.lu, remote_node)
9483         _CheckNodeVmCapable(self.lu, remote_node)
9484
9485         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9486         assert old_node_info is not None
9487         if old_node_info.offline and not self.early_release:
9488           # doesn't make sense to delay the release
9489           self.early_release = True
9490           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9491                           " early-release mode", secondary_node)
9492
9493       else:
9494         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9495                                      self.mode)
9496
9497       # If not specified all disks should be replaced
9498       if not self.disks:
9499         self.disks = range(len(self.instance.disks))
9500
9501     for node in check_nodes:
9502       _CheckNodeOnline(self.lu, node)
9503
9504     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9505                                                           self.other_node,
9506                                                           self.target_node]
9507                               if node_name is not None)
9508
9509     # Release unneeded node locks
9510     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9511
9512     # Release any owned node group
9513     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9514       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9515
9516     # Check whether disks are valid
9517     for disk_idx in self.disks:
9518       instance.FindDisk(disk_idx)
9519
9520     # Get secondary node IP addresses
9521     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9522                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9523
9524   def Exec(self, feedback_fn):
9525     """Execute disk replacement.
9526
9527     This dispatches the disk replacement to the appropriate handler.
9528
9529     """
9530     if self.delay_iallocator:
9531       self._CheckPrereq2()
9532
9533     if __debug__:
9534       # Verify owned locks before starting operation
9535       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9536       assert set(owned_nodes) == set(self.node_secondary_ip), \
9537           ("Incorrect node locks, owning %s, expected %s" %
9538            (owned_nodes, self.node_secondary_ip.keys()))
9539
9540       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9541       assert list(owned_instances) == [self.instance_name], \
9542           "Instance '%s' not locked" % self.instance_name
9543
9544       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9545           "Should not own any node group lock at this point"
9546
9547     if not self.disks:
9548       feedback_fn("No disks need replacement")
9549       return
9550
9551     feedback_fn("Replacing disk(s) %s for %s" %
9552                 (utils.CommaJoin(self.disks), self.instance.name))
9553
9554     activate_disks = (not self.instance.admin_up)
9555
9556     # Activate the instance disks if we're replacing them on a down instance
9557     if activate_disks:
9558       _StartInstanceDisks(self.lu, self.instance, True)
9559
9560     try:
9561       # Should we replace the secondary node?
9562       if self.new_node is not None:
9563         fn = self._ExecDrbd8Secondary
9564       else:
9565         fn = self._ExecDrbd8DiskOnly
9566
9567       result = fn(feedback_fn)
9568     finally:
9569       # Deactivate the instance disks if we're replacing them on a
9570       # down instance
9571       if activate_disks:
9572         _SafeShutdownInstanceDisks(self.lu, self.instance)
9573
9574     if __debug__:
9575       # Verify owned locks
9576       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9577       nodes = frozenset(self.node_secondary_ip)
9578       assert ((self.early_release and not owned_nodes) or
9579               (not self.early_release and not (set(owned_nodes) - nodes))), \
9580         ("Not owning the correct locks, early_release=%s, owned=%r,"
9581          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9582
9583     return result
9584
9585   def _CheckVolumeGroup(self, nodes):
9586     self.lu.LogInfo("Checking volume groups")
9587
9588     vgname = self.cfg.GetVGName()
9589
9590     # Make sure volume group exists on all involved nodes
9591     results = self.rpc.call_vg_list(nodes)
9592     if not results:
9593       raise errors.OpExecError("Can't list volume groups on the nodes")
9594
9595     for node in nodes:
9596       res = results[node]
9597       res.Raise("Error checking node %s" % node)
9598       if vgname not in res.payload:
9599         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9600                                  (vgname, node))
9601
9602   def _CheckDisksExistence(self, nodes):
9603     # Check disk existence
9604     for idx, dev in enumerate(self.instance.disks):
9605       if idx not in self.disks:
9606         continue
9607
9608       for node in nodes:
9609         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9610         self.cfg.SetDiskID(dev, node)
9611
9612         result = self.rpc.call_blockdev_find(node, dev)
9613
9614         msg = result.fail_msg
9615         if msg or not result.payload:
9616           if not msg:
9617             msg = "disk not found"
9618           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9619                                    (idx, node, msg))
9620
9621   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9622     for idx, dev in enumerate(self.instance.disks):
9623       if idx not in self.disks:
9624         continue
9625
9626       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9627                       (idx, node_name))
9628
9629       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9630                                    ldisk=ldisk):
9631         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9632                                  " replace disks for instance %s" %
9633                                  (node_name, self.instance.name))
9634
9635   def _CreateNewStorage(self, node_name):
9636     """Create new storage on the primary or secondary node.
9637
9638     This is only used for same-node replaces, not for changing the
9639     secondary node, hence we don't want to modify the existing disk.
9640
9641     """
9642     iv_names = {}
9643
9644     for idx, dev in enumerate(self.instance.disks):
9645       if idx not in self.disks:
9646         continue
9647
9648       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9649
9650       self.cfg.SetDiskID(dev, node_name)
9651
9652       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9653       names = _GenerateUniqueNames(self.lu, lv_names)
9654
9655       vg_data = dev.children[0].logical_id[0]
9656       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9657                              logical_id=(vg_data, names[0]))
9658       vg_meta = dev.children[1].logical_id[0]
9659       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9660                              logical_id=(vg_meta, names[1]))
9661
9662       new_lvs = [lv_data, lv_meta]
9663       old_lvs = [child.Copy() for child in dev.children]
9664       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9665
9666       # we pass force_create=True to force the LVM creation
9667       for new_lv in new_lvs:
9668         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9669                         _GetInstanceInfoText(self.instance), False)
9670
9671     return iv_names
9672
9673   def _CheckDevices(self, node_name, iv_names):
9674     for name, (dev, _, _) in iv_names.iteritems():
9675       self.cfg.SetDiskID(dev, node_name)
9676
9677       result = self.rpc.call_blockdev_find(node_name, dev)
9678
9679       msg = result.fail_msg
9680       if msg or not result.payload:
9681         if not msg:
9682           msg = "disk not found"
9683         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9684                                  (name, msg))
9685
9686       if result.payload.is_degraded:
9687         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9688
9689   def _RemoveOldStorage(self, node_name, iv_names):
9690     for name, (_, old_lvs, _) in iv_names.iteritems():
9691       self.lu.LogInfo("Remove logical volumes for %s" % name)
9692
9693       for lv in old_lvs:
9694         self.cfg.SetDiskID(lv, node_name)
9695
9696         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9697         if msg:
9698           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9699                              hint="remove unused LVs manually")
9700
9701   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9702     """Replace a disk on the primary or secondary for DRBD 8.
9703
9704     The algorithm for replace is quite complicated:
9705
9706       1. for each disk to be replaced:
9707
9708         1. create new LVs on the target node with unique names
9709         1. detach old LVs from the drbd device
9710         1. rename old LVs to name_replaced.<time_t>
9711         1. rename new LVs to old LVs
9712         1. attach the new LVs (with the old names now) to the drbd device
9713
9714       1. wait for sync across all devices
9715
9716       1. for each modified disk:
9717
9718         1. remove old LVs (which have the name name_replaces.<time_t>)
9719
9720     Failures are not very well handled.
9721
9722     """
9723     steps_total = 6
9724
9725     # Step: check device activation
9726     self.lu.LogStep(1, steps_total, "Check device existence")
9727     self._CheckDisksExistence([self.other_node, self.target_node])
9728     self._CheckVolumeGroup([self.target_node, self.other_node])
9729
9730     # Step: check other node consistency
9731     self.lu.LogStep(2, steps_total, "Check peer consistency")
9732     self._CheckDisksConsistency(self.other_node,
9733                                 self.other_node == self.instance.primary_node,
9734                                 False)
9735
9736     # Step: create new storage
9737     self.lu.LogStep(3, steps_total, "Allocate new storage")
9738     iv_names = self._CreateNewStorage(self.target_node)
9739
9740     # Step: for each lv, detach+rename*2+attach
9741     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9742     for dev, old_lvs, new_lvs in iv_names.itervalues():
9743       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9744
9745       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9746                                                      old_lvs)
9747       result.Raise("Can't detach drbd from local storage on node"
9748                    " %s for device %s" % (self.target_node, dev.iv_name))
9749       #dev.children = []
9750       #cfg.Update(instance)
9751
9752       # ok, we created the new LVs, so now we know we have the needed
9753       # storage; as such, we proceed on the target node to rename
9754       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9755       # using the assumption that logical_id == physical_id (which in
9756       # turn is the unique_id on that node)
9757
9758       # FIXME(iustin): use a better name for the replaced LVs
9759       temp_suffix = int(time.time())
9760       ren_fn = lambda d, suff: (d.physical_id[0],
9761                                 d.physical_id[1] + "_replaced-%s" % suff)
9762
9763       # Build the rename list based on what LVs exist on the node
9764       rename_old_to_new = []
9765       for to_ren in old_lvs:
9766         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9767         if not result.fail_msg and result.payload:
9768           # device exists
9769           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9770
9771       self.lu.LogInfo("Renaming the old LVs on the target node")
9772       result = self.rpc.call_blockdev_rename(self.target_node,
9773                                              rename_old_to_new)
9774       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9775
9776       # Now we rename the new LVs to the old LVs
9777       self.lu.LogInfo("Renaming the new LVs on the target node")
9778       rename_new_to_old = [(new, old.physical_id)
9779                            for old, new in zip(old_lvs, new_lvs)]
9780       result = self.rpc.call_blockdev_rename(self.target_node,
9781                                              rename_new_to_old)
9782       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9783
9784       # Intermediate steps of in memory modifications
9785       for old, new in zip(old_lvs, new_lvs):
9786         new.logical_id = old.logical_id
9787         self.cfg.SetDiskID(new, self.target_node)
9788
9789       # We need to modify old_lvs so that removal later removes the
9790       # right LVs, not the newly added ones; note that old_lvs is a
9791       # copy here
9792       for disk in old_lvs:
9793         disk.logical_id = ren_fn(disk, temp_suffix)
9794         self.cfg.SetDiskID(disk, self.target_node)
9795
9796       # Now that the new lvs have the old name, we can add them to the device
9797       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9798       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9799                                                   new_lvs)
9800       msg = result.fail_msg
9801       if msg:
9802         for new_lv in new_lvs:
9803           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9804                                                new_lv).fail_msg
9805           if msg2:
9806             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9807                                hint=("cleanup manually the unused logical"
9808                                      "volumes"))
9809         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9810
9811     cstep = 5
9812     if self.early_release:
9813       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9814       cstep += 1
9815       self._RemoveOldStorage(self.target_node, iv_names)
9816       # WARNING: we release both node locks here, do not do other RPCs
9817       # than WaitForSync to the primary node
9818       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9819                     names=[self.target_node, self.other_node])
9820
9821     # Wait for sync
9822     # This can fail as the old devices are degraded and _WaitForSync
9823     # does a combined result over all disks, so we don't check its return value
9824     self.lu.LogStep(cstep, steps_total, "Sync devices")
9825     cstep += 1
9826     _WaitForSync(self.lu, self.instance)
9827
9828     # Check all devices manually
9829     self._CheckDevices(self.instance.primary_node, iv_names)
9830
9831     # Step: remove old storage
9832     if not self.early_release:
9833       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9834       cstep += 1
9835       self._RemoveOldStorage(self.target_node, iv_names)
9836
9837   def _ExecDrbd8Secondary(self, feedback_fn):
9838     """Replace the secondary node for DRBD 8.
9839
9840     The algorithm for replace is quite complicated:
9841       - for all disks of the instance:
9842         - create new LVs on the new node with same names
9843         - shutdown the drbd device on the old secondary
9844         - disconnect the drbd network on the primary
9845         - create the drbd device on the new secondary
9846         - network attach the drbd on the primary, using an artifice:
9847           the drbd code for Attach() will connect to the network if it
9848           finds a device which is connected to the good local disks but
9849           not network enabled
9850       - wait for sync across all devices
9851       - remove all disks from the old secondary
9852
9853     Failures are not very well handled.
9854
9855     """
9856     steps_total = 6
9857
9858     pnode = self.instance.primary_node
9859
9860     # Step: check device activation
9861     self.lu.LogStep(1, steps_total, "Check device existence")
9862     self._CheckDisksExistence([self.instance.primary_node])
9863     self._CheckVolumeGroup([self.instance.primary_node])
9864
9865     # Step: check other node consistency
9866     self.lu.LogStep(2, steps_total, "Check peer consistency")
9867     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9868
9869     # Step: create new storage
9870     self.lu.LogStep(3, steps_total, "Allocate new storage")
9871     for idx, dev in enumerate(self.instance.disks):
9872       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9873                       (self.new_node, idx))
9874       # we pass force_create=True to force LVM creation
9875       for new_lv in dev.children:
9876         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9877                         _GetInstanceInfoText(self.instance), False)
9878
9879     # Step 4: dbrd minors and drbd setups changes
9880     # after this, we must manually remove the drbd minors on both the
9881     # error and the success paths
9882     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9883     minors = self.cfg.AllocateDRBDMinor([self.new_node
9884                                          for dev in self.instance.disks],
9885                                         self.instance.name)
9886     logging.debug("Allocated minors %r", minors)
9887
9888     iv_names = {}
9889     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9890       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9891                       (self.new_node, idx))
9892       # create new devices on new_node; note that we create two IDs:
9893       # one without port, so the drbd will be activated without
9894       # networking information on the new node at this stage, and one
9895       # with network, for the latter activation in step 4
9896       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9897       if self.instance.primary_node == o_node1:
9898         p_minor = o_minor1
9899       else:
9900         assert self.instance.primary_node == o_node2, "Three-node instance?"
9901         p_minor = o_minor2
9902
9903       new_alone_id = (self.instance.primary_node, self.new_node, None,
9904                       p_minor, new_minor, o_secret)
9905       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9906                     p_minor, new_minor, o_secret)
9907
9908       iv_names[idx] = (dev, dev.children, new_net_id)
9909       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9910                     new_net_id)
9911       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9912                               logical_id=new_alone_id,
9913                               children=dev.children,
9914                               size=dev.size)
9915       try:
9916         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9917                               _GetInstanceInfoText(self.instance), False)
9918       except errors.GenericError:
9919         self.cfg.ReleaseDRBDMinors(self.instance.name)
9920         raise
9921
9922     # We have new devices, shutdown the drbd on the old secondary
9923     for idx, dev in enumerate(self.instance.disks):
9924       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9925       self.cfg.SetDiskID(dev, self.target_node)
9926       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9927       if msg:
9928         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9929                            "node: %s" % (idx, msg),
9930                            hint=("Please cleanup this device manually as"
9931                                  " soon as possible"))
9932
9933     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9934     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9935                                                self.instance.disks)[pnode]
9936
9937     msg = result.fail_msg
9938     if msg:
9939       # detaches didn't succeed (unlikely)
9940       self.cfg.ReleaseDRBDMinors(self.instance.name)
9941       raise errors.OpExecError("Can't detach the disks from the network on"
9942                                " old node: %s" % (msg,))
9943
9944     # if we managed to detach at least one, we update all the disks of
9945     # the instance to point to the new secondary
9946     self.lu.LogInfo("Updating instance configuration")
9947     for dev, _, new_logical_id in iv_names.itervalues():
9948       dev.logical_id = new_logical_id
9949       self.cfg.SetDiskID(dev, self.instance.primary_node)
9950
9951     self.cfg.Update(self.instance, feedback_fn)
9952
9953     # and now perform the drbd attach
9954     self.lu.LogInfo("Attaching primary drbds to new secondary"
9955                     " (standalone => connected)")
9956     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9957                                             self.new_node],
9958                                            self.node_secondary_ip,
9959                                            self.instance.disks,
9960                                            self.instance.name,
9961                                            False)
9962     for to_node, to_result in result.items():
9963       msg = to_result.fail_msg
9964       if msg:
9965         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9966                            to_node, msg,
9967                            hint=("please do a gnt-instance info to see the"
9968                                  " status of disks"))
9969     cstep = 5
9970     if self.early_release:
9971       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9972       cstep += 1
9973       self._RemoveOldStorage(self.target_node, iv_names)
9974       # WARNING: we release all node locks here, do not do other RPCs
9975       # than WaitForSync to the primary node
9976       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9977                     names=[self.instance.primary_node,
9978                            self.target_node,
9979                            self.new_node])
9980
9981     # Wait for sync
9982     # This can fail as the old devices are degraded and _WaitForSync
9983     # does a combined result over all disks, so we don't check its return value
9984     self.lu.LogStep(cstep, steps_total, "Sync devices")
9985     cstep += 1
9986     _WaitForSync(self.lu, self.instance)
9987
9988     # Check all devices manually
9989     self._CheckDevices(self.instance.primary_node, iv_names)
9990
9991     # Step: remove old storage
9992     if not self.early_release:
9993       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9994       self._RemoveOldStorage(self.target_node, iv_names)
9995
9996
9997 class LURepairNodeStorage(NoHooksLU):
9998   """Repairs the volume group on a node.
9999
10000   """
10001   REQ_BGL = False
10002
10003   def CheckArguments(self):
10004     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10005
10006     storage_type = self.op.storage_type
10007
10008     if (constants.SO_FIX_CONSISTENCY not in
10009         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10010       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10011                                  " repaired" % storage_type,
10012                                  errors.ECODE_INVAL)
10013
10014   def ExpandNames(self):
10015     self.needed_locks = {
10016       locking.LEVEL_NODE: [self.op.node_name],
10017       }
10018
10019   def _CheckFaultyDisks(self, instance, node_name):
10020     """Ensure faulty disks abort the opcode or at least warn."""
10021     try:
10022       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10023                                   node_name, True):
10024         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10025                                    " node '%s'" % (instance.name, node_name),
10026                                    errors.ECODE_STATE)
10027     except errors.OpPrereqError, err:
10028       if self.op.ignore_consistency:
10029         self.proc.LogWarning(str(err.args[0]))
10030       else:
10031         raise
10032
10033   def CheckPrereq(self):
10034     """Check prerequisites.
10035
10036     """
10037     # Check whether any instance on this node has faulty disks
10038     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10039       if not inst.admin_up:
10040         continue
10041       check_nodes = set(inst.all_nodes)
10042       check_nodes.discard(self.op.node_name)
10043       for inst_node_name in check_nodes:
10044         self._CheckFaultyDisks(inst, inst_node_name)
10045
10046   def Exec(self, feedback_fn):
10047     feedback_fn("Repairing storage unit '%s' on %s ..." %
10048                 (self.op.name, self.op.node_name))
10049
10050     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10051     result = self.rpc.call_storage_execute(self.op.node_name,
10052                                            self.op.storage_type, st_args,
10053                                            self.op.name,
10054                                            constants.SO_FIX_CONSISTENCY)
10055     result.Raise("Failed to repair storage unit '%s' on %s" %
10056                  (self.op.name, self.op.node_name))
10057
10058
10059 class LUNodeEvacuate(NoHooksLU):
10060   """Evacuates instances off a list of nodes.
10061
10062   """
10063   REQ_BGL = False
10064
10065   def CheckArguments(self):
10066     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10067
10068   def ExpandNames(self):
10069     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10070
10071     if self.op.remote_node is not None:
10072       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10073       assert self.op.remote_node
10074
10075       if self.op.remote_node == self.op.node_name:
10076         raise errors.OpPrereqError("Can not use evacuated node as a new"
10077                                    " secondary node", errors.ECODE_INVAL)
10078
10079       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10080         raise errors.OpPrereqError("Without the use of an iallocator only"
10081                                    " secondary instances can be evacuated",
10082                                    errors.ECODE_INVAL)
10083
10084     # Declare locks
10085     self.share_locks = _ShareAll()
10086     self.needed_locks = {
10087       locking.LEVEL_INSTANCE: [],
10088       locking.LEVEL_NODEGROUP: [],
10089       locking.LEVEL_NODE: [],
10090       }
10091
10092     if self.op.remote_node is None:
10093       # Iallocator will choose any node(s) in the same group
10094       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10095     else:
10096       group_nodes = frozenset([self.op.remote_node])
10097
10098     # Determine nodes to be locked
10099     self.lock_nodes = set([self.op.node_name]) | group_nodes
10100
10101   def _DetermineInstances(self):
10102     """Builds list of instances to operate on.
10103
10104     """
10105     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10106
10107     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10108       # Primary instances only
10109       inst_fn = _GetNodePrimaryInstances
10110       assert self.op.remote_node is None, \
10111         "Evacuating primary instances requires iallocator"
10112     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10113       # Secondary instances only
10114       inst_fn = _GetNodeSecondaryInstances
10115     else:
10116       # All instances
10117       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10118       inst_fn = _GetNodeInstances
10119
10120     return inst_fn(self.cfg, self.op.node_name)
10121
10122   def DeclareLocks(self, level):
10123     if level == locking.LEVEL_INSTANCE:
10124       # Lock instances optimistically, needs verification once node and group
10125       # locks have been acquired
10126       self.needed_locks[locking.LEVEL_INSTANCE] = \
10127         set(i.name for i in self._DetermineInstances())
10128
10129     elif level == locking.LEVEL_NODEGROUP:
10130       # Lock node groups optimistically, needs verification once nodes have
10131       # been acquired
10132       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10133         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10134
10135     elif level == locking.LEVEL_NODE:
10136       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10137
10138   def CheckPrereq(self):
10139     # Verify locks
10140     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10141     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10142     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10143
10144     assert owned_nodes == self.lock_nodes
10145
10146     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10147     if owned_groups != wanted_groups:
10148       raise errors.OpExecError("Node groups changed since locks were acquired,"
10149                                " current groups are '%s', used to be '%s'" %
10150                                (utils.CommaJoin(wanted_groups),
10151                                 utils.CommaJoin(owned_groups)))
10152
10153     # Determine affected instances
10154     self.instances = self._DetermineInstances()
10155     self.instance_names = [i.name for i in self.instances]
10156
10157     if set(self.instance_names) != owned_instances:
10158       raise errors.OpExecError("Instances on node '%s' changed since locks"
10159                                " were acquired, current instances are '%s',"
10160                                " used to be '%s'" %
10161                                (self.op.node_name,
10162                                 utils.CommaJoin(self.instance_names),
10163                                 utils.CommaJoin(owned_instances)))
10164
10165     if self.instance_names:
10166       self.LogInfo("Evacuating instances from node '%s': %s",
10167                    self.op.node_name,
10168                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10169     else:
10170       self.LogInfo("No instances to evacuate from node '%s'",
10171                    self.op.node_name)
10172
10173     if self.op.remote_node is not None:
10174       for i in self.instances:
10175         if i.primary_node == self.op.remote_node:
10176           raise errors.OpPrereqError("Node %s is the primary node of"
10177                                      " instance %s, cannot use it as"
10178                                      " secondary" %
10179                                      (self.op.remote_node, i.name),
10180                                      errors.ECODE_INVAL)
10181
10182   def Exec(self, feedback_fn):
10183     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10184
10185     if not self.instance_names:
10186       # No instances to evacuate
10187       jobs = []
10188
10189     elif self.op.iallocator is not None:
10190       # TODO: Implement relocation to other group
10191       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10192                        evac_mode=self.op.mode,
10193                        instances=list(self.instance_names))
10194
10195       ial.Run(self.op.iallocator)
10196
10197       if not ial.success:
10198         raise errors.OpPrereqError("Can't compute node evacuation using"
10199                                    " iallocator '%s': %s" %
10200                                    (self.op.iallocator, ial.info),
10201                                    errors.ECODE_NORES)
10202
10203       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10204
10205     elif self.op.remote_node is not None:
10206       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10207       jobs = [
10208         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10209                                         remote_node=self.op.remote_node,
10210                                         disks=[],
10211                                         mode=constants.REPLACE_DISK_CHG,
10212                                         early_release=self.op.early_release)]
10213         for instance_name in self.instance_names
10214         ]
10215
10216     else:
10217       raise errors.ProgrammerError("No iallocator or remote node")
10218
10219     return ResultWithJobs(jobs)
10220
10221
10222 def _SetOpEarlyRelease(early_release, op):
10223   """Sets C{early_release} flag on opcodes if available.
10224
10225   """
10226   try:
10227     op.early_release = early_release
10228   except AttributeError:
10229     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10230
10231   return op
10232
10233
10234 def _NodeEvacDest(use_nodes, group, nodes):
10235   """Returns group or nodes depending on caller's choice.
10236
10237   """
10238   if use_nodes:
10239     return utils.CommaJoin(nodes)
10240   else:
10241     return group
10242
10243
10244 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10245   """Unpacks the result of change-group and node-evacuate iallocator requests.
10246
10247   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10248   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10249
10250   @type lu: L{LogicalUnit}
10251   @param lu: Logical unit instance
10252   @type alloc_result: tuple/list
10253   @param alloc_result: Result from iallocator
10254   @type early_release: bool
10255   @param early_release: Whether to release locks early if possible
10256   @type use_nodes: bool
10257   @param use_nodes: Whether to display node names instead of groups
10258
10259   """
10260   (moved, failed, jobs) = alloc_result
10261
10262   if failed:
10263     lu.LogWarning("Unable to evacuate instances %s",
10264                   utils.CommaJoin("%s (%s)" % (name, reason)
10265                                   for (name, reason) in failed))
10266
10267   if moved:
10268     lu.LogInfo("Instances to be moved: %s",
10269                utils.CommaJoin("%s (to %s)" %
10270                                (name, _NodeEvacDest(use_nodes, group, nodes))
10271                                for (name, group, nodes) in moved))
10272
10273   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10274               map(opcodes.OpCode.LoadOpCode, ops))
10275           for ops in jobs]
10276
10277
10278 class LUInstanceGrowDisk(LogicalUnit):
10279   """Grow a disk of an instance.
10280
10281   """
10282   HPATH = "disk-grow"
10283   HTYPE = constants.HTYPE_INSTANCE
10284   REQ_BGL = False
10285
10286   def ExpandNames(self):
10287     self._ExpandAndLockInstance()
10288     self.needed_locks[locking.LEVEL_NODE] = []
10289     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10290
10291   def DeclareLocks(self, level):
10292     if level == locking.LEVEL_NODE:
10293       self._LockInstancesNodes()
10294
10295   def BuildHooksEnv(self):
10296     """Build hooks env.
10297
10298     This runs on the master, the primary and all the secondaries.
10299
10300     """
10301     env = {
10302       "DISK": self.op.disk,
10303       "AMOUNT": self.op.amount,
10304       }
10305     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10306     return env
10307
10308   def BuildHooksNodes(self):
10309     """Build hooks nodes.
10310
10311     """
10312     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10313     return (nl, nl)
10314
10315   def CheckPrereq(self):
10316     """Check prerequisites.
10317
10318     This checks that the instance is in the cluster.
10319
10320     """
10321     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10322     assert instance is not None, \
10323       "Cannot retrieve locked instance %s" % self.op.instance_name
10324     nodenames = list(instance.all_nodes)
10325     for node in nodenames:
10326       _CheckNodeOnline(self, node)
10327
10328     self.instance = instance
10329
10330     if instance.disk_template not in constants.DTS_GROWABLE:
10331       raise errors.OpPrereqError("Instance's disk layout does not support"
10332                                  " growing", errors.ECODE_INVAL)
10333
10334     self.disk = instance.FindDisk(self.op.disk)
10335
10336     if instance.disk_template not in (constants.DT_FILE,
10337                                       constants.DT_SHARED_FILE):
10338       # TODO: check the free disk space for file, when that feature will be
10339       # supported
10340       _CheckNodesFreeDiskPerVG(self, nodenames,
10341                                self.disk.ComputeGrowth(self.op.amount))
10342
10343   def Exec(self, feedback_fn):
10344     """Execute disk grow.
10345
10346     """
10347     instance = self.instance
10348     disk = self.disk
10349
10350     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10351     if not disks_ok:
10352       raise errors.OpExecError("Cannot activate block device to grow")
10353
10354     # First run all grow ops in dry-run mode
10355     for node in instance.all_nodes:
10356       self.cfg.SetDiskID(disk, node)
10357       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10358       result.Raise("Grow request failed to node %s" % node)
10359
10360     # We know that (as far as we can test) operations across different
10361     # nodes will succeed, time to run it for real
10362     for node in instance.all_nodes:
10363       self.cfg.SetDiskID(disk, node)
10364       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10365       result.Raise("Grow request failed to node %s" % node)
10366
10367       # TODO: Rewrite code to work properly
10368       # DRBD goes into sync mode for a short amount of time after executing the
10369       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10370       # calling "resize" in sync mode fails. Sleeping for a short amount of
10371       # time is a work-around.
10372       time.sleep(5)
10373
10374     disk.RecordGrow(self.op.amount)
10375     self.cfg.Update(instance, feedback_fn)
10376     if self.op.wait_for_sync:
10377       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10378       if disk_abort:
10379         self.proc.LogWarning("Disk sync-ing has not returned a good"
10380                              " status; please check the instance")
10381       if not instance.admin_up:
10382         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10383     elif not instance.admin_up:
10384       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10385                            " not supposed to be running because no wait for"
10386                            " sync mode was requested")
10387
10388
10389 class LUInstanceQueryData(NoHooksLU):
10390   """Query runtime instance data.
10391
10392   """
10393   REQ_BGL = False
10394
10395   def ExpandNames(self):
10396     self.needed_locks = {}
10397
10398     # Use locking if requested or when non-static information is wanted
10399     if not (self.op.static or self.op.use_locking):
10400       self.LogWarning("Non-static data requested, locks need to be acquired")
10401       self.op.use_locking = True
10402
10403     if self.op.instances or not self.op.use_locking:
10404       # Expand instance names right here
10405       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10406     else:
10407       # Will use acquired locks
10408       self.wanted_names = None
10409
10410     if self.op.use_locking:
10411       self.share_locks = _ShareAll()
10412
10413       if self.wanted_names is None:
10414         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10415       else:
10416         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10417
10418       self.needed_locks[locking.LEVEL_NODE] = []
10419       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10420
10421   def DeclareLocks(self, level):
10422     if self.op.use_locking and level == locking.LEVEL_NODE:
10423       self._LockInstancesNodes()
10424
10425   def CheckPrereq(self):
10426     """Check prerequisites.
10427
10428     This only checks the optional instance list against the existing names.
10429
10430     """
10431     if self.wanted_names is None:
10432       assert self.op.use_locking, "Locking was not used"
10433       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10434
10435     self.wanted_instances = \
10436         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10437
10438   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10439     """Returns the status of a block device
10440
10441     """
10442     if self.op.static or not node:
10443       return None
10444
10445     self.cfg.SetDiskID(dev, node)
10446
10447     result = self.rpc.call_blockdev_find(node, dev)
10448     if result.offline:
10449       return None
10450
10451     result.Raise("Can't compute disk status for %s" % instance_name)
10452
10453     status = result.payload
10454     if status is None:
10455       return None
10456
10457     return (status.dev_path, status.major, status.minor,
10458             status.sync_percent, status.estimated_time,
10459             status.is_degraded, status.ldisk_status)
10460
10461   def _ComputeDiskStatus(self, instance, snode, dev):
10462     """Compute block device status.
10463
10464     """
10465     if dev.dev_type in constants.LDS_DRBD:
10466       # we change the snode then (otherwise we use the one passed in)
10467       if dev.logical_id[0] == instance.primary_node:
10468         snode = dev.logical_id[1]
10469       else:
10470         snode = dev.logical_id[0]
10471
10472     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10473                                               instance.name, dev)
10474     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10475
10476     if dev.children:
10477       dev_children = map(compat.partial(self._ComputeDiskStatus,
10478                                         instance, snode),
10479                          dev.children)
10480     else:
10481       dev_children = []
10482
10483     return {
10484       "iv_name": dev.iv_name,
10485       "dev_type": dev.dev_type,
10486       "logical_id": dev.logical_id,
10487       "physical_id": dev.physical_id,
10488       "pstatus": dev_pstatus,
10489       "sstatus": dev_sstatus,
10490       "children": dev_children,
10491       "mode": dev.mode,
10492       "size": dev.size,
10493       }
10494
10495   def Exec(self, feedback_fn):
10496     """Gather and return data"""
10497     result = {}
10498
10499     cluster = self.cfg.GetClusterInfo()
10500
10501     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10502                                           for i in self.wanted_instances)
10503     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10504       if self.op.static or pnode.offline:
10505         remote_state = None
10506         if pnode.offline:
10507           self.LogWarning("Primary node %s is marked offline, returning static"
10508                           " information only for instance %s" %
10509                           (pnode.name, instance.name))
10510       else:
10511         remote_info = self.rpc.call_instance_info(instance.primary_node,
10512                                                   instance.name,
10513                                                   instance.hypervisor)
10514         remote_info.Raise("Error checking node %s" % instance.primary_node)
10515         remote_info = remote_info.payload
10516         if remote_info and "state" in remote_info:
10517           remote_state = "up"
10518         else:
10519           remote_state = "down"
10520
10521       if instance.admin_up:
10522         config_state = "up"
10523       else:
10524         config_state = "down"
10525
10526       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10527                   instance.disks)
10528
10529       result[instance.name] = {
10530         "name": instance.name,
10531         "config_state": config_state,
10532         "run_state": remote_state,
10533         "pnode": instance.primary_node,
10534         "snodes": instance.secondary_nodes,
10535         "os": instance.os,
10536         # this happens to be the same format used for hooks
10537         "nics": _NICListToTuple(self, instance.nics),
10538         "disk_template": instance.disk_template,
10539         "disks": disks,
10540         "hypervisor": instance.hypervisor,
10541         "network_port": instance.network_port,
10542         "hv_instance": instance.hvparams,
10543         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10544         "be_instance": instance.beparams,
10545         "be_actual": cluster.FillBE(instance),
10546         "os_instance": instance.osparams,
10547         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10548         "serial_no": instance.serial_no,
10549         "mtime": instance.mtime,
10550         "ctime": instance.ctime,
10551         "uuid": instance.uuid,
10552         }
10553
10554     return result
10555
10556
10557 class LUInstanceSetParams(LogicalUnit):
10558   """Modifies an instances's parameters.
10559
10560   """
10561   HPATH = "instance-modify"
10562   HTYPE = constants.HTYPE_INSTANCE
10563   REQ_BGL = False
10564
10565   def CheckArguments(self):
10566     if not (self.op.nics or self.op.disks or self.op.disk_template or
10567             self.op.hvparams or self.op.beparams or self.op.os_name):
10568       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10569
10570     if self.op.hvparams:
10571       _CheckGlobalHvParams(self.op.hvparams)
10572
10573     # Disk validation
10574     disk_addremove = 0
10575     for disk_op, disk_dict in self.op.disks:
10576       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10577       if disk_op == constants.DDM_REMOVE:
10578         disk_addremove += 1
10579         continue
10580       elif disk_op == constants.DDM_ADD:
10581         disk_addremove += 1
10582       else:
10583         if not isinstance(disk_op, int):
10584           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10585         if not isinstance(disk_dict, dict):
10586           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10587           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10588
10589       if disk_op == constants.DDM_ADD:
10590         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10591         if mode not in constants.DISK_ACCESS_SET:
10592           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10593                                      errors.ECODE_INVAL)
10594         size = disk_dict.get(constants.IDISK_SIZE, None)
10595         if size is None:
10596           raise errors.OpPrereqError("Required disk parameter size missing",
10597                                      errors.ECODE_INVAL)
10598         try:
10599           size = int(size)
10600         except (TypeError, ValueError), err:
10601           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10602                                      str(err), errors.ECODE_INVAL)
10603         disk_dict[constants.IDISK_SIZE] = size
10604       else:
10605         # modification of disk
10606         if constants.IDISK_SIZE in disk_dict:
10607           raise errors.OpPrereqError("Disk size change not possible, use"
10608                                      " grow-disk", errors.ECODE_INVAL)
10609
10610     if disk_addremove > 1:
10611       raise errors.OpPrereqError("Only one disk add or remove operation"
10612                                  " supported at a time", errors.ECODE_INVAL)
10613
10614     if self.op.disks and self.op.disk_template is not None:
10615       raise errors.OpPrereqError("Disk template conversion and other disk"
10616                                  " changes not supported at the same time",
10617                                  errors.ECODE_INVAL)
10618
10619     if (self.op.disk_template and
10620         self.op.disk_template in constants.DTS_INT_MIRROR and
10621         self.op.remote_node is None):
10622       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10623                                  " one requires specifying a secondary node",
10624                                  errors.ECODE_INVAL)
10625
10626     # NIC validation
10627     nic_addremove = 0
10628     for nic_op, nic_dict in self.op.nics:
10629       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10630       if nic_op == constants.DDM_REMOVE:
10631         nic_addremove += 1
10632         continue
10633       elif nic_op == constants.DDM_ADD:
10634         nic_addremove += 1
10635       else:
10636         if not isinstance(nic_op, int):
10637           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10638         if not isinstance(nic_dict, dict):
10639           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10640           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10641
10642       # nic_dict should be a dict
10643       nic_ip = nic_dict.get(constants.INIC_IP, None)
10644       if nic_ip is not None:
10645         if nic_ip.lower() == constants.VALUE_NONE:
10646           nic_dict[constants.INIC_IP] = None
10647         else:
10648           if not netutils.IPAddress.IsValid(nic_ip):
10649             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10650                                        errors.ECODE_INVAL)
10651
10652       nic_bridge = nic_dict.get("bridge", None)
10653       nic_link = nic_dict.get(constants.INIC_LINK, None)
10654       if nic_bridge and nic_link:
10655         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10656                                    " at the same time", errors.ECODE_INVAL)
10657       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10658         nic_dict["bridge"] = None
10659       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10660         nic_dict[constants.INIC_LINK] = None
10661
10662       if nic_op == constants.DDM_ADD:
10663         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10664         if nic_mac is None:
10665           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10666
10667       if constants.INIC_MAC in nic_dict:
10668         nic_mac = nic_dict[constants.INIC_MAC]
10669         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10670           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10671
10672         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10673           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10674                                      " modifying an existing nic",
10675                                      errors.ECODE_INVAL)
10676
10677     if nic_addremove > 1:
10678       raise errors.OpPrereqError("Only one NIC add or remove operation"
10679                                  " supported at a time", errors.ECODE_INVAL)
10680
10681   def ExpandNames(self):
10682     self._ExpandAndLockInstance()
10683     self.needed_locks[locking.LEVEL_NODE] = []
10684     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10685
10686   def DeclareLocks(self, level):
10687     if level == locking.LEVEL_NODE:
10688       self._LockInstancesNodes()
10689       if self.op.disk_template and self.op.remote_node:
10690         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10691         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10692
10693   def BuildHooksEnv(self):
10694     """Build hooks env.
10695
10696     This runs on the master, primary and secondaries.
10697
10698     """
10699     args = dict()
10700     if constants.BE_MEMORY in self.be_new:
10701       args["memory"] = self.be_new[constants.BE_MEMORY]
10702     if constants.BE_VCPUS in self.be_new:
10703       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10704     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10705     # information at all.
10706     if self.op.nics:
10707       args["nics"] = []
10708       nic_override = dict(self.op.nics)
10709       for idx, nic in enumerate(self.instance.nics):
10710         if idx in nic_override:
10711           this_nic_override = nic_override[idx]
10712         else:
10713           this_nic_override = {}
10714         if constants.INIC_IP in this_nic_override:
10715           ip = this_nic_override[constants.INIC_IP]
10716         else:
10717           ip = nic.ip
10718         if constants.INIC_MAC in this_nic_override:
10719           mac = this_nic_override[constants.INIC_MAC]
10720         else:
10721           mac = nic.mac
10722         if idx in self.nic_pnew:
10723           nicparams = self.nic_pnew[idx]
10724         else:
10725           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10726         mode = nicparams[constants.NIC_MODE]
10727         link = nicparams[constants.NIC_LINK]
10728         args["nics"].append((ip, mac, mode, link))
10729       if constants.DDM_ADD in nic_override:
10730         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10731         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10732         nicparams = self.nic_pnew[constants.DDM_ADD]
10733         mode = nicparams[constants.NIC_MODE]
10734         link = nicparams[constants.NIC_LINK]
10735         args["nics"].append((ip, mac, mode, link))
10736       elif constants.DDM_REMOVE in nic_override:
10737         del args["nics"][-1]
10738
10739     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10740     if self.op.disk_template:
10741       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10742
10743     return env
10744
10745   def BuildHooksNodes(self):
10746     """Build hooks nodes.
10747
10748     """
10749     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10750     return (nl, nl)
10751
10752   def CheckPrereq(self):
10753     """Check prerequisites.
10754
10755     This only checks the instance list against the existing names.
10756
10757     """
10758     # checking the new params on the primary/secondary nodes
10759
10760     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10761     cluster = self.cluster = self.cfg.GetClusterInfo()
10762     assert self.instance is not None, \
10763       "Cannot retrieve locked instance %s" % self.op.instance_name
10764     pnode = instance.primary_node
10765     nodelist = list(instance.all_nodes)
10766
10767     # OS change
10768     if self.op.os_name and not self.op.force:
10769       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10770                       self.op.force_variant)
10771       instance_os = self.op.os_name
10772     else:
10773       instance_os = instance.os
10774
10775     if self.op.disk_template:
10776       if instance.disk_template == self.op.disk_template:
10777         raise errors.OpPrereqError("Instance already has disk template %s" %
10778                                    instance.disk_template, errors.ECODE_INVAL)
10779
10780       if (instance.disk_template,
10781           self.op.disk_template) not in self._DISK_CONVERSIONS:
10782         raise errors.OpPrereqError("Unsupported disk template conversion from"
10783                                    " %s to %s" % (instance.disk_template,
10784                                                   self.op.disk_template),
10785                                    errors.ECODE_INVAL)
10786       _CheckInstanceDown(self, instance, "cannot change disk template")
10787       if self.op.disk_template in constants.DTS_INT_MIRROR:
10788         if self.op.remote_node == pnode:
10789           raise errors.OpPrereqError("Given new secondary node %s is the same"
10790                                      " as the primary node of the instance" %
10791                                      self.op.remote_node, errors.ECODE_STATE)
10792         _CheckNodeOnline(self, self.op.remote_node)
10793         _CheckNodeNotDrained(self, self.op.remote_node)
10794         # FIXME: here we assume that the old instance type is DT_PLAIN
10795         assert instance.disk_template == constants.DT_PLAIN
10796         disks = [{constants.IDISK_SIZE: d.size,
10797                   constants.IDISK_VG: d.logical_id[0]}
10798                  for d in instance.disks]
10799         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10800         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10801
10802     # hvparams processing
10803     if self.op.hvparams:
10804       hv_type = instance.hypervisor
10805       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10806       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10807       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10808
10809       # local check
10810       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10811       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10812       self.hv_new = hv_new # the new actual values
10813       self.hv_inst = i_hvdict # the new dict (without defaults)
10814     else:
10815       self.hv_new = self.hv_inst = {}
10816
10817     # beparams processing
10818     if self.op.beparams:
10819       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10820                                    use_none=True)
10821       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10822       be_new = cluster.SimpleFillBE(i_bedict)
10823       self.be_new = be_new # the new actual values
10824       self.be_inst = i_bedict # the new dict (without defaults)
10825     else:
10826       self.be_new = self.be_inst = {}
10827     be_old = cluster.FillBE(instance)
10828
10829     # osparams processing
10830     if self.op.osparams:
10831       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10832       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10833       self.os_inst = i_osdict # the new dict (without defaults)
10834     else:
10835       self.os_inst = {}
10836
10837     self.warn = []
10838
10839     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10840         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10841       mem_check_list = [pnode]
10842       if be_new[constants.BE_AUTO_BALANCE]:
10843         # either we changed auto_balance to yes or it was from before
10844         mem_check_list.extend(instance.secondary_nodes)
10845       instance_info = self.rpc.call_instance_info(pnode, instance.name,
10846                                                   instance.hypervisor)
10847       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10848                                          instance.hypervisor)
10849       pninfo = nodeinfo[pnode]
10850       msg = pninfo.fail_msg
10851       if msg:
10852         # Assume the primary node is unreachable and go ahead
10853         self.warn.append("Can't get info from primary node %s: %s" %
10854                          (pnode, msg))
10855       elif not isinstance(pninfo.payload.get("memory_free", None), int):
10856         self.warn.append("Node data from primary node %s doesn't contain"
10857                          " free memory information" % pnode)
10858       elif instance_info.fail_msg:
10859         self.warn.append("Can't get instance runtime information: %s" %
10860                         instance_info.fail_msg)
10861       else:
10862         if instance_info.payload:
10863           current_mem = int(instance_info.payload["memory"])
10864         else:
10865           # Assume instance not running
10866           # (there is a slight race condition here, but it's not very probable,
10867           # and we have no other way to check)
10868           current_mem = 0
10869         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10870                     pninfo.payload["memory_free"])
10871         if miss_mem > 0:
10872           raise errors.OpPrereqError("This change will prevent the instance"
10873                                      " from starting, due to %d MB of memory"
10874                                      " missing on its primary node" % miss_mem,
10875                                      errors.ECODE_NORES)
10876
10877       if be_new[constants.BE_AUTO_BALANCE]:
10878         for node, nres in nodeinfo.items():
10879           if node not in instance.secondary_nodes:
10880             continue
10881           nres.Raise("Can't get info from secondary node %s" % node,
10882                      prereq=True, ecode=errors.ECODE_STATE)
10883           if not isinstance(nres.payload.get("memory_free", None), int):
10884             raise errors.OpPrereqError("Secondary node %s didn't return free"
10885                                        " memory information" % node,
10886                                        errors.ECODE_STATE)
10887           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10888             raise errors.OpPrereqError("This change will prevent the instance"
10889                                        " from failover to its secondary node"
10890                                        " %s, due to not enough memory" % node,
10891                                        errors.ECODE_STATE)
10892
10893     # NIC processing
10894     self.nic_pnew = {}
10895     self.nic_pinst = {}
10896     for nic_op, nic_dict in self.op.nics:
10897       if nic_op == constants.DDM_REMOVE:
10898         if not instance.nics:
10899           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10900                                      errors.ECODE_INVAL)
10901         continue
10902       if nic_op != constants.DDM_ADD:
10903         # an existing nic
10904         if not instance.nics:
10905           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10906                                      " no NICs" % nic_op,
10907                                      errors.ECODE_INVAL)
10908         if nic_op < 0 or nic_op >= len(instance.nics):
10909           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10910                                      " are 0 to %d" %
10911                                      (nic_op, len(instance.nics) - 1),
10912                                      errors.ECODE_INVAL)
10913         old_nic_params = instance.nics[nic_op].nicparams
10914         old_nic_ip = instance.nics[nic_op].ip
10915       else:
10916         old_nic_params = {}
10917         old_nic_ip = None
10918
10919       update_params_dict = dict([(key, nic_dict[key])
10920                                  for key in constants.NICS_PARAMETERS
10921                                  if key in nic_dict])
10922
10923       if "bridge" in nic_dict:
10924         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10925
10926       new_nic_params = _GetUpdatedParams(old_nic_params,
10927                                          update_params_dict)
10928       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10929       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10930       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10931       self.nic_pinst[nic_op] = new_nic_params
10932       self.nic_pnew[nic_op] = new_filled_nic_params
10933       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10934
10935       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10936         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10937         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10938         if msg:
10939           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10940           if self.op.force:
10941             self.warn.append(msg)
10942           else:
10943             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10944       if new_nic_mode == constants.NIC_MODE_ROUTED:
10945         if constants.INIC_IP in nic_dict:
10946           nic_ip = nic_dict[constants.INIC_IP]
10947         else:
10948           nic_ip = old_nic_ip
10949         if nic_ip is None:
10950           raise errors.OpPrereqError("Cannot set the nic ip to None"
10951                                      " on a routed nic", errors.ECODE_INVAL)
10952       if constants.INIC_MAC in nic_dict:
10953         nic_mac = nic_dict[constants.INIC_MAC]
10954         if nic_mac is None:
10955           raise errors.OpPrereqError("Cannot set the nic mac to None",
10956                                      errors.ECODE_INVAL)
10957         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10958           # otherwise generate the mac
10959           nic_dict[constants.INIC_MAC] = \
10960             self.cfg.GenerateMAC(self.proc.GetECId())
10961         else:
10962           # or validate/reserve the current one
10963           try:
10964             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10965           except errors.ReservationError:
10966             raise errors.OpPrereqError("MAC address %s already in use"
10967                                        " in cluster" % nic_mac,
10968                                        errors.ECODE_NOTUNIQUE)
10969
10970     # DISK processing
10971     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10972       raise errors.OpPrereqError("Disk operations not supported for"
10973                                  " diskless instances",
10974                                  errors.ECODE_INVAL)
10975     for disk_op, _ in self.op.disks:
10976       if disk_op == constants.DDM_REMOVE:
10977         if len(instance.disks) == 1:
10978           raise errors.OpPrereqError("Cannot remove the last disk of"
10979                                      " an instance", errors.ECODE_INVAL)
10980         _CheckInstanceDown(self, instance, "cannot remove disks")
10981
10982       if (disk_op == constants.DDM_ADD and
10983           len(instance.disks) >= constants.MAX_DISKS):
10984         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10985                                    " add more" % constants.MAX_DISKS,
10986                                    errors.ECODE_STATE)
10987       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10988         # an existing disk
10989         if disk_op < 0 or disk_op >= len(instance.disks):
10990           raise errors.OpPrereqError("Invalid disk index %s, valid values"
10991                                      " are 0 to %d" %
10992                                      (disk_op, len(instance.disks)),
10993                                      errors.ECODE_INVAL)
10994
10995     return
10996
10997   def _ConvertPlainToDrbd(self, feedback_fn):
10998     """Converts an instance from plain to drbd.
10999
11000     """
11001     feedback_fn("Converting template to drbd")
11002     instance = self.instance
11003     pnode = instance.primary_node
11004     snode = self.op.remote_node
11005
11006     # create a fake disk info for _GenerateDiskTemplate
11007     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11008                   constants.IDISK_VG: d.logical_id[0]}
11009                  for d in instance.disks]
11010     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11011                                       instance.name, pnode, [snode],
11012                                       disk_info, None, None, 0, feedback_fn)
11013     info = _GetInstanceInfoText(instance)
11014     feedback_fn("Creating aditional volumes...")
11015     # first, create the missing data and meta devices
11016     for disk in new_disks:
11017       # unfortunately this is... not too nice
11018       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11019                             info, True)
11020       for child in disk.children:
11021         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11022     # at this stage, all new LVs have been created, we can rename the
11023     # old ones
11024     feedback_fn("Renaming original volumes...")
11025     rename_list = [(o, n.children[0].logical_id)
11026                    for (o, n) in zip(instance.disks, new_disks)]
11027     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11028     result.Raise("Failed to rename original LVs")
11029
11030     feedback_fn("Initializing DRBD devices...")
11031     # all child devices are in place, we can now create the DRBD devices
11032     for disk in new_disks:
11033       for node in [pnode, snode]:
11034         f_create = node == pnode
11035         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11036
11037     # at this point, the instance has been modified
11038     instance.disk_template = constants.DT_DRBD8
11039     instance.disks = new_disks
11040     self.cfg.Update(instance, feedback_fn)
11041
11042     # disks are created, waiting for sync
11043     disk_abort = not _WaitForSync(self, instance,
11044                                   oneshot=not self.op.wait_for_sync)
11045     if disk_abort:
11046       raise errors.OpExecError("There are some degraded disks for"
11047                                " this instance, please cleanup manually")
11048
11049   def _ConvertDrbdToPlain(self, feedback_fn):
11050     """Converts an instance from drbd to plain.
11051
11052     """
11053     instance = self.instance
11054     assert len(instance.secondary_nodes) == 1
11055     pnode = instance.primary_node
11056     snode = instance.secondary_nodes[0]
11057     feedback_fn("Converting template to plain")
11058
11059     old_disks = instance.disks
11060     new_disks = [d.children[0] for d in old_disks]
11061
11062     # copy over size and mode
11063     for parent, child in zip(old_disks, new_disks):
11064       child.size = parent.size
11065       child.mode = parent.mode
11066
11067     # update instance structure
11068     instance.disks = new_disks
11069     instance.disk_template = constants.DT_PLAIN
11070     self.cfg.Update(instance, feedback_fn)
11071
11072     feedback_fn("Removing volumes on the secondary node...")
11073     for disk in old_disks:
11074       self.cfg.SetDiskID(disk, snode)
11075       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11076       if msg:
11077         self.LogWarning("Could not remove block device %s on node %s,"
11078                         " continuing anyway: %s", disk.iv_name, snode, msg)
11079
11080     feedback_fn("Removing unneeded volumes on the primary node...")
11081     for idx, disk in enumerate(old_disks):
11082       meta = disk.children[1]
11083       self.cfg.SetDiskID(meta, pnode)
11084       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11085       if msg:
11086         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11087                         " continuing anyway: %s", idx, pnode, msg)
11088
11089   def Exec(self, feedback_fn):
11090     """Modifies an instance.
11091
11092     All parameters take effect only at the next restart of the instance.
11093
11094     """
11095     # Process here the warnings from CheckPrereq, as we don't have a
11096     # feedback_fn there.
11097     for warn in self.warn:
11098       feedback_fn("WARNING: %s" % warn)
11099
11100     result = []
11101     instance = self.instance
11102     # disk changes
11103     for disk_op, disk_dict in self.op.disks:
11104       if disk_op == constants.DDM_REMOVE:
11105         # remove the last disk
11106         device = instance.disks.pop()
11107         device_idx = len(instance.disks)
11108         for node, disk in device.ComputeNodeTree(instance.primary_node):
11109           self.cfg.SetDiskID(disk, node)
11110           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11111           if msg:
11112             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11113                             " continuing anyway", device_idx, node, msg)
11114         result.append(("disk/%d" % device_idx, "remove"))
11115       elif disk_op == constants.DDM_ADD:
11116         # add a new disk
11117         if instance.disk_template in (constants.DT_FILE,
11118                                         constants.DT_SHARED_FILE):
11119           file_driver, file_path = instance.disks[0].logical_id
11120           file_path = os.path.dirname(file_path)
11121         else:
11122           file_driver = file_path = None
11123         disk_idx_base = len(instance.disks)
11124         new_disk = _GenerateDiskTemplate(self,
11125                                          instance.disk_template,
11126                                          instance.name, instance.primary_node,
11127                                          instance.secondary_nodes,
11128                                          [disk_dict],
11129                                          file_path,
11130                                          file_driver,
11131                                          disk_idx_base, feedback_fn)[0]
11132         instance.disks.append(new_disk)
11133         info = _GetInstanceInfoText(instance)
11134
11135         logging.info("Creating volume %s for instance %s",
11136                      new_disk.iv_name, instance.name)
11137         # Note: this needs to be kept in sync with _CreateDisks
11138         #HARDCODE
11139         for node in instance.all_nodes:
11140           f_create = node == instance.primary_node
11141           try:
11142             _CreateBlockDev(self, node, instance, new_disk,
11143                             f_create, info, f_create)
11144           except errors.OpExecError, err:
11145             self.LogWarning("Failed to create volume %s (%s) on"
11146                             " node %s: %s",
11147                             new_disk.iv_name, new_disk, node, err)
11148         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11149                        (new_disk.size, new_disk.mode)))
11150       else:
11151         # change a given disk
11152         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11153         result.append(("disk.mode/%d" % disk_op,
11154                        disk_dict[constants.IDISK_MODE]))
11155
11156     if self.op.disk_template:
11157       r_shut = _ShutdownInstanceDisks(self, instance)
11158       if not r_shut:
11159         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11160                                  " proceed with disk template conversion")
11161       mode = (instance.disk_template, self.op.disk_template)
11162       try:
11163         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11164       except:
11165         self.cfg.ReleaseDRBDMinors(instance.name)
11166         raise
11167       result.append(("disk_template", self.op.disk_template))
11168
11169     # NIC changes
11170     for nic_op, nic_dict in self.op.nics:
11171       if nic_op == constants.DDM_REMOVE:
11172         # remove the last nic
11173         del instance.nics[-1]
11174         result.append(("nic.%d" % len(instance.nics), "remove"))
11175       elif nic_op == constants.DDM_ADD:
11176         # mac and bridge should be set, by now
11177         mac = nic_dict[constants.INIC_MAC]
11178         ip = nic_dict.get(constants.INIC_IP, None)
11179         nicparams = self.nic_pinst[constants.DDM_ADD]
11180         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11181         instance.nics.append(new_nic)
11182         result.append(("nic.%d" % (len(instance.nics) - 1),
11183                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11184                        (new_nic.mac, new_nic.ip,
11185                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11186                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11187                        )))
11188       else:
11189         for key in (constants.INIC_MAC, constants.INIC_IP):
11190           if key in nic_dict:
11191             setattr(instance.nics[nic_op], key, nic_dict[key])
11192         if nic_op in self.nic_pinst:
11193           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11194         for key, val in nic_dict.iteritems():
11195           result.append(("nic.%s/%d" % (key, nic_op), val))
11196
11197     # hvparams changes
11198     if self.op.hvparams:
11199       instance.hvparams = self.hv_inst
11200       for key, val in self.op.hvparams.iteritems():
11201         result.append(("hv/%s" % key, val))
11202
11203     # beparams changes
11204     if self.op.beparams:
11205       instance.beparams = self.be_inst
11206       for key, val in self.op.beparams.iteritems():
11207         result.append(("be/%s" % key, val))
11208
11209     # OS change
11210     if self.op.os_name:
11211       instance.os = self.op.os_name
11212
11213     # osparams changes
11214     if self.op.osparams:
11215       instance.osparams = self.os_inst
11216       for key, val in self.op.osparams.iteritems():
11217         result.append(("os/%s" % key, val))
11218
11219     self.cfg.Update(instance, feedback_fn)
11220
11221     return result
11222
11223   _DISK_CONVERSIONS = {
11224     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11225     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11226     }
11227
11228
11229 class LUInstanceChangeGroup(LogicalUnit):
11230   HPATH = "instance-change-group"
11231   HTYPE = constants.HTYPE_INSTANCE
11232   REQ_BGL = False
11233
11234   def ExpandNames(self):
11235     self.share_locks = _ShareAll()
11236     self.needed_locks = {
11237       locking.LEVEL_NODEGROUP: [],
11238       locking.LEVEL_NODE: [],
11239       }
11240
11241     self._ExpandAndLockInstance()
11242
11243     if self.op.target_groups:
11244       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11245                                   self.op.target_groups)
11246     else:
11247       self.req_target_uuids = None
11248
11249     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11250
11251   def DeclareLocks(self, level):
11252     if level == locking.LEVEL_NODEGROUP:
11253       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11254
11255       if self.req_target_uuids:
11256         lock_groups = set(self.req_target_uuids)
11257
11258         # Lock all groups used by instance optimistically; this requires going
11259         # via the node before it's locked, requiring verification later on
11260         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11261         lock_groups.update(instance_groups)
11262       else:
11263         # No target groups, need to lock all of them
11264         lock_groups = locking.ALL_SET
11265
11266       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11267
11268     elif level == locking.LEVEL_NODE:
11269       if self.req_target_uuids:
11270         # Lock all nodes used by instances
11271         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11272         self._LockInstancesNodes()
11273
11274         # Lock all nodes in all potential target groups
11275         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11276                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11277         member_nodes = [node_name
11278                         for group in lock_groups
11279                         for node_name in self.cfg.GetNodeGroup(group).members]
11280         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11281       else:
11282         # Lock all nodes as all groups are potential targets
11283         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11284
11285   def CheckPrereq(self):
11286     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11287     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11288     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11289
11290     assert (self.req_target_uuids is None or
11291             owned_groups.issuperset(self.req_target_uuids))
11292     assert owned_instances == set([self.op.instance_name])
11293
11294     # Get instance information
11295     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11296
11297     # Check if node groups for locked instance are still correct
11298     assert owned_nodes.issuperset(self.instance.all_nodes), \
11299       ("Instance %s's nodes changed while we kept the lock" %
11300        self.op.instance_name)
11301
11302     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11303                                            owned_groups)
11304
11305     if self.req_target_uuids:
11306       # User requested specific target groups
11307       self.target_uuids = self.req_target_uuids
11308     else:
11309       # All groups except those used by the instance are potential targets
11310       self.target_uuids = owned_groups - inst_groups
11311
11312     conflicting_groups = self.target_uuids & inst_groups
11313     if conflicting_groups:
11314       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11315                                  " used by the instance '%s'" %
11316                                  (utils.CommaJoin(conflicting_groups),
11317                                   self.op.instance_name),
11318                                  errors.ECODE_INVAL)
11319
11320     if not self.target_uuids:
11321       raise errors.OpPrereqError("There are no possible target groups",
11322                                  errors.ECODE_INVAL)
11323
11324   def BuildHooksEnv(self):
11325     """Build hooks env.
11326
11327     """
11328     assert self.target_uuids
11329
11330     env = {
11331       "TARGET_GROUPS": " ".join(self.target_uuids),
11332       }
11333
11334     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11335
11336     return env
11337
11338   def BuildHooksNodes(self):
11339     """Build hooks nodes.
11340
11341     """
11342     mn = self.cfg.GetMasterNode()
11343     return ([mn], [mn])
11344
11345   def Exec(self, feedback_fn):
11346     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11347
11348     assert instances == [self.op.instance_name], "Instance not locked"
11349
11350     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11351                      instances=instances, target_groups=list(self.target_uuids))
11352
11353     ial.Run(self.op.iallocator)
11354
11355     if not ial.success:
11356       raise errors.OpPrereqError("Can't compute solution for changing group of"
11357                                  " instance '%s' using iallocator '%s': %s" %
11358                                  (self.op.instance_name, self.op.iallocator,
11359                                   ial.info),
11360                                  errors.ECODE_NORES)
11361
11362     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11363
11364     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11365                  " instance '%s'", len(jobs), self.op.instance_name)
11366
11367     return ResultWithJobs(jobs)
11368
11369
11370 class LUBackupQuery(NoHooksLU):
11371   """Query the exports list
11372
11373   """
11374   REQ_BGL = False
11375
11376   def ExpandNames(self):
11377     self.needed_locks = {}
11378     self.share_locks[locking.LEVEL_NODE] = 1
11379     if not self.op.nodes:
11380       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11381     else:
11382       self.needed_locks[locking.LEVEL_NODE] = \
11383         _GetWantedNodes(self, self.op.nodes)
11384
11385   def Exec(self, feedback_fn):
11386     """Compute the list of all the exported system images.
11387
11388     @rtype: dict
11389     @return: a dictionary with the structure node->(export-list)
11390         where export-list is a list of the instances exported on
11391         that node.
11392
11393     """
11394     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11395     rpcresult = self.rpc.call_export_list(self.nodes)
11396     result = {}
11397     for node in rpcresult:
11398       if rpcresult[node].fail_msg:
11399         result[node] = False
11400       else:
11401         result[node] = rpcresult[node].payload
11402
11403     return result
11404
11405
11406 class LUBackupPrepare(NoHooksLU):
11407   """Prepares an instance for an export and returns useful information.
11408
11409   """
11410   REQ_BGL = False
11411
11412   def ExpandNames(self):
11413     self._ExpandAndLockInstance()
11414
11415   def CheckPrereq(self):
11416     """Check prerequisites.
11417
11418     """
11419     instance_name = self.op.instance_name
11420
11421     self.instance = self.cfg.GetInstanceInfo(instance_name)
11422     assert self.instance is not None, \
11423           "Cannot retrieve locked instance %s" % self.op.instance_name
11424     _CheckNodeOnline(self, self.instance.primary_node)
11425
11426     self._cds = _GetClusterDomainSecret()
11427
11428   def Exec(self, feedback_fn):
11429     """Prepares an instance for an export.
11430
11431     """
11432     instance = self.instance
11433
11434     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11435       salt = utils.GenerateSecret(8)
11436
11437       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11438       result = self.rpc.call_x509_cert_create(instance.primary_node,
11439                                               constants.RIE_CERT_VALIDITY)
11440       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11441
11442       (name, cert_pem) = result.payload
11443
11444       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11445                                              cert_pem)
11446
11447       return {
11448         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11449         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11450                           salt),
11451         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11452         }
11453
11454     return None
11455
11456
11457 class LUBackupExport(LogicalUnit):
11458   """Export an instance to an image in the cluster.
11459
11460   """
11461   HPATH = "instance-export"
11462   HTYPE = constants.HTYPE_INSTANCE
11463   REQ_BGL = False
11464
11465   def CheckArguments(self):
11466     """Check the arguments.
11467
11468     """
11469     self.x509_key_name = self.op.x509_key_name
11470     self.dest_x509_ca_pem = self.op.destination_x509_ca
11471
11472     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11473       if not self.x509_key_name:
11474         raise errors.OpPrereqError("Missing X509 key name for encryption",
11475                                    errors.ECODE_INVAL)
11476
11477       if not self.dest_x509_ca_pem:
11478         raise errors.OpPrereqError("Missing destination X509 CA",
11479                                    errors.ECODE_INVAL)
11480
11481   def ExpandNames(self):
11482     self._ExpandAndLockInstance()
11483
11484     # Lock all nodes for local exports
11485     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11486       # FIXME: lock only instance primary and destination node
11487       #
11488       # Sad but true, for now we have do lock all nodes, as we don't know where
11489       # the previous export might be, and in this LU we search for it and
11490       # remove it from its current node. In the future we could fix this by:
11491       #  - making a tasklet to search (share-lock all), then create the
11492       #    new one, then one to remove, after
11493       #  - removing the removal operation altogether
11494       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11495
11496   def DeclareLocks(self, level):
11497     """Last minute lock declaration."""
11498     # All nodes are locked anyway, so nothing to do here.
11499
11500   def BuildHooksEnv(self):
11501     """Build hooks env.
11502
11503     This will run on the master, primary node and target node.
11504
11505     """
11506     env = {
11507       "EXPORT_MODE": self.op.mode,
11508       "EXPORT_NODE": self.op.target_node,
11509       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11510       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11511       # TODO: Generic function for boolean env variables
11512       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11513       }
11514
11515     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11516
11517     return env
11518
11519   def BuildHooksNodes(self):
11520     """Build hooks nodes.
11521
11522     """
11523     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11524
11525     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11526       nl.append(self.op.target_node)
11527
11528     return (nl, nl)
11529
11530   def CheckPrereq(self):
11531     """Check prerequisites.
11532
11533     This checks that the instance and node names are valid.
11534
11535     """
11536     instance_name = self.op.instance_name
11537
11538     self.instance = self.cfg.GetInstanceInfo(instance_name)
11539     assert self.instance is not None, \
11540           "Cannot retrieve locked instance %s" % self.op.instance_name
11541     _CheckNodeOnline(self, self.instance.primary_node)
11542
11543     if (self.op.remove_instance and self.instance.admin_up and
11544         not self.op.shutdown):
11545       raise errors.OpPrereqError("Can not remove instance without shutting it"
11546                                  " down before")
11547
11548     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11549       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11550       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11551       assert self.dst_node is not None
11552
11553       _CheckNodeOnline(self, self.dst_node.name)
11554       _CheckNodeNotDrained(self, self.dst_node.name)
11555
11556       self._cds = None
11557       self.dest_disk_info = None
11558       self.dest_x509_ca = None
11559
11560     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11561       self.dst_node = None
11562
11563       if len(self.op.target_node) != len(self.instance.disks):
11564         raise errors.OpPrereqError(("Received destination information for %s"
11565                                     " disks, but instance %s has %s disks") %
11566                                    (len(self.op.target_node), instance_name,
11567                                     len(self.instance.disks)),
11568                                    errors.ECODE_INVAL)
11569
11570       cds = _GetClusterDomainSecret()
11571
11572       # Check X509 key name
11573       try:
11574         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11575       except (TypeError, ValueError), err:
11576         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11577
11578       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11579         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11580                                    errors.ECODE_INVAL)
11581
11582       # Load and verify CA
11583       try:
11584         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11585       except OpenSSL.crypto.Error, err:
11586         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11587                                    (err, ), errors.ECODE_INVAL)
11588
11589       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11590       if errcode is not None:
11591         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11592                                    (msg, ), errors.ECODE_INVAL)
11593
11594       self.dest_x509_ca = cert
11595
11596       # Verify target information
11597       disk_info = []
11598       for idx, disk_data in enumerate(self.op.target_node):
11599         try:
11600           (host, port, magic) = \
11601             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11602         except errors.GenericError, err:
11603           raise errors.OpPrereqError("Target info for disk %s: %s" %
11604                                      (idx, err), errors.ECODE_INVAL)
11605
11606         disk_info.append((host, port, magic))
11607
11608       assert len(disk_info) == len(self.op.target_node)
11609       self.dest_disk_info = disk_info
11610
11611     else:
11612       raise errors.ProgrammerError("Unhandled export mode %r" %
11613                                    self.op.mode)
11614
11615     # instance disk type verification
11616     # TODO: Implement export support for file-based disks
11617     for disk in self.instance.disks:
11618       if disk.dev_type == constants.LD_FILE:
11619         raise errors.OpPrereqError("Export not supported for instances with"
11620                                    " file-based disks", errors.ECODE_INVAL)
11621
11622   def _CleanupExports(self, feedback_fn):
11623     """Removes exports of current instance from all other nodes.
11624
11625     If an instance in a cluster with nodes A..D was exported to node C, its
11626     exports will be removed from the nodes A, B and D.
11627
11628     """
11629     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11630
11631     nodelist = self.cfg.GetNodeList()
11632     nodelist.remove(self.dst_node.name)
11633
11634     # on one-node clusters nodelist will be empty after the removal
11635     # if we proceed the backup would be removed because OpBackupQuery
11636     # substitutes an empty list with the full cluster node list.
11637     iname = self.instance.name
11638     if nodelist:
11639       feedback_fn("Removing old exports for instance %s" % iname)
11640       exportlist = self.rpc.call_export_list(nodelist)
11641       for node in exportlist:
11642         if exportlist[node].fail_msg:
11643           continue
11644         if iname in exportlist[node].payload:
11645           msg = self.rpc.call_export_remove(node, iname).fail_msg
11646           if msg:
11647             self.LogWarning("Could not remove older export for instance %s"
11648                             " on node %s: %s", iname, node, msg)
11649
11650   def Exec(self, feedback_fn):
11651     """Export an instance to an image in the cluster.
11652
11653     """
11654     assert self.op.mode in constants.EXPORT_MODES
11655
11656     instance = self.instance
11657     src_node = instance.primary_node
11658
11659     if self.op.shutdown:
11660       # shutdown the instance, but not the disks
11661       feedback_fn("Shutting down instance %s" % instance.name)
11662       result = self.rpc.call_instance_shutdown(src_node, instance,
11663                                                self.op.shutdown_timeout)
11664       # TODO: Maybe ignore failures if ignore_remove_failures is set
11665       result.Raise("Could not shutdown instance %s on"
11666                    " node %s" % (instance.name, src_node))
11667
11668     # set the disks ID correctly since call_instance_start needs the
11669     # correct drbd minor to create the symlinks
11670     for disk in instance.disks:
11671       self.cfg.SetDiskID(disk, src_node)
11672
11673     activate_disks = (not instance.admin_up)
11674
11675     if activate_disks:
11676       # Activate the instance disks if we'exporting a stopped instance
11677       feedback_fn("Activating disks for %s" % instance.name)
11678       _StartInstanceDisks(self, instance, None)
11679
11680     try:
11681       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11682                                                      instance)
11683
11684       helper.CreateSnapshots()
11685       try:
11686         if (self.op.shutdown and instance.admin_up and
11687             not self.op.remove_instance):
11688           assert not activate_disks
11689           feedback_fn("Starting instance %s" % instance.name)
11690           result = self.rpc.call_instance_start(src_node, instance,
11691                                                 None, None, False)
11692           msg = result.fail_msg
11693           if msg:
11694             feedback_fn("Failed to start instance: %s" % msg)
11695             _ShutdownInstanceDisks(self, instance)
11696             raise errors.OpExecError("Could not start instance: %s" % msg)
11697
11698         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11699           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11700         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11701           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11702           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11703
11704           (key_name, _, _) = self.x509_key_name
11705
11706           dest_ca_pem = \
11707             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11708                                             self.dest_x509_ca)
11709
11710           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11711                                                      key_name, dest_ca_pem,
11712                                                      timeouts)
11713       finally:
11714         helper.Cleanup()
11715
11716       # Check for backwards compatibility
11717       assert len(dresults) == len(instance.disks)
11718       assert compat.all(isinstance(i, bool) for i in dresults), \
11719              "Not all results are boolean: %r" % dresults
11720
11721     finally:
11722       if activate_disks:
11723         feedback_fn("Deactivating disks for %s" % instance.name)
11724         _ShutdownInstanceDisks(self, instance)
11725
11726     if not (compat.all(dresults) and fin_resu):
11727       failures = []
11728       if not fin_resu:
11729         failures.append("export finalization")
11730       if not compat.all(dresults):
11731         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11732                                if not dsk)
11733         failures.append("disk export: disk(s) %s" % fdsk)
11734
11735       raise errors.OpExecError("Export failed, errors in %s" %
11736                                utils.CommaJoin(failures))
11737
11738     # At this point, the export was successful, we can cleanup/finish
11739
11740     # Remove instance if requested
11741     if self.op.remove_instance:
11742       feedback_fn("Removing instance %s" % instance.name)
11743       _RemoveInstance(self, feedback_fn, instance,
11744                       self.op.ignore_remove_failures)
11745
11746     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11747       self._CleanupExports(feedback_fn)
11748
11749     return fin_resu, dresults
11750
11751
11752 class LUBackupRemove(NoHooksLU):
11753   """Remove exports related to the named instance.
11754
11755   """
11756   REQ_BGL = False
11757
11758   def ExpandNames(self):
11759     self.needed_locks = {}
11760     # We need all nodes to be locked in order for RemoveExport to work, but we
11761     # don't need to lock the instance itself, as nothing will happen to it (and
11762     # we can remove exports also for a removed instance)
11763     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11764
11765   def Exec(self, feedback_fn):
11766     """Remove any export.
11767
11768     """
11769     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11770     # If the instance was not found we'll try with the name that was passed in.
11771     # This will only work if it was an FQDN, though.
11772     fqdn_warn = False
11773     if not instance_name:
11774       fqdn_warn = True
11775       instance_name = self.op.instance_name
11776
11777     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11778     exportlist = self.rpc.call_export_list(locked_nodes)
11779     found = False
11780     for node in exportlist:
11781       msg = exportlist[node].fail_msg
11782       if msg:
11783         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11784         continue
11785       if instance_name in exportlist[node].payload:
11786         found = True
11787         result = self.rpc.call_export_remove(node, instance_name)
11788         msg = result.fail_msg
11789         if msg:
11790           logging.error("Could not remove export for instance %s"
11791                         " on node %s: %s", instance_name, node, msg)
11792
11793     if fqdn_warn and not found:
11794       feedback_fn("Export not found. If trying to remove an export belonging"
11795                   " to a deleted instance please use its Fully Qualified"
11796                   " Domain Name.")
11797
11798
11799 class LUGroupAdd(LogicalUnit):
11800   """Logical unit for creating node groups.
11801
11802   """
11803   HPATH = "group-add"
11804   HTYPE = constants.HTYPE_GROUP
11805   REQ_BGL = False
11806
11807   def ExpandNames(self):
11808     # We need the new group's UUID here so that we can create and acquire the
11809     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11810     # that it should not check whether the UUID exists in the configuration.
11811     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11812     self.needed_locks = {}
11813     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11814
11815   def CheckPrereq(self):
11816     """Check prerequisites.
11817
11818     This checks that the given group name is not an existing node group
11819     already.
11820
11821     """
11822     try:
11823       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11824     except errors.OpPrereqError:
11825       pass
11826     else:
11827       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11828                                  " node group (UUID: %s)" %
11829                                  (self.op.group_name, existing_uuid),
11830                                  errors.ECODE_EXISTS)
11831
11832     if self.op.ndparams:
11833       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11834
11835   def BuildHooksEnv(self):
11836     """Build hooks env.
11837
11838     """
11839     return {
11840       "GROUP_NAME": self.op.group_name,
11841       }
11842
11843   def BuildHooksNodes(self):
11844     """Build hooks nodes.
11845
11846     """
11847     mn = self.cfg.GetMasterNode()
11848     return ([mn], [mn])
11849
11850   def Exec(self, feedback_fn):
11851     """Add the node group to the cluster.
11852
11853     """
11854     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11855                                   uuid=self.group_uuid,
11856                                   alloc_policy=self.op.alloc_policy,
11857                                   ndparams=self.op.ndparams)
11858
11859     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11860     del self.remove_locks[locking.LEVEL_NODEGROUP]
11861
11862
11863 class LUGroupAssignNodes(NoHooksLU):
11864   """Logical unit for assigning nodes to groups.
11865
11866   """
11867   REQ_BGL = False
11868
11869   def ExpandNames(self):
11870     # These raise errors.OpPrereqError on their own:
11871     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11872     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11873
11874     # We want to lock all the affected nodes and groups. We have readily
11875     # available the list of nodes, and the *destination* group. To gather the
11876     # list of "source" groups, we need to fetch node information later on.
11877     self.needed_locks = {
11878       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11879       locking.LEVEL_NODE: self.op.nodes,
11880       }
11881
11882   def DeclareLocks(self, level):
11883     if level == locking.LEVEL_NODEGROUP:
11884       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11885
11886       # Try to get all affected nodes' groups without having the group or node
11887       # lock yet. Needs verification later in the code flow.
11888       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11889
11890       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11891
11892   def CheckPrereq(self):
11893     """Check prerequisites.
11894
11895     """
11896     assert self.needed_locks[locking.LEVEL_NODEGROUP]
11897     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11898             frozenset(self.op.nodes))
11899
11900     expected_locks = (set([self.group_uuid]) |
11901                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11902     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11903     if actual_locks != expected_locks:
11904       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11905                                " current groups are '%s', used to be '%s'" %
11906                                (utils.CommaJoin(expected_locks),
11907                                 utils.CommaJoin(actual_locks)))
11908
11909     self.node_data = self.cfg.GetAllNodesInfo()
11910     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11911     instance_data = self.cfg.GetAllInstancesInfo()
11912
11913     if self.group is None:
11914       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11915                                (self.op.group_name, self.group_uuid))
11916
11917     (new_splits, previous_splits) = \
11918       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11919                                              for node in self.op.nodes],
11920                                             self.node_data, instance_data)
11921
11922     if new_splits:
11923       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11924
11925       if not self.op.force:
11926         raise errors.OpExecError("The following instances get split by this"
11927                                  " change and --force was not given: %s" %
11928                                  fmt_new_splits)
11929       else:
11930         self.LogWarning("This operation will split the following instances: %s",
11931                         fmt_new_splits)
11932
11933         if previous_splits:
11934           self.LogWarning("In addition, these already-split instances continue"
11935                           " to be split across groups: %s",
11936                           utils.CommaJoin(utils.NiceSort(previous_splits)))
11937
11938   def Exec(self, feedback_fn):
11939     """Assign nodes to a new group.
11940
11941     """
11942     for node in self.op.nodes:
11943       self.node_data[node].group = self.group_uuid
11944
11945     # FIXME: Depends on side-effects of modifying the result of
11946     # C{cfg.GetAllNodesInfo}
11947
11948     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11949
11950   @staticmethod
11951   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11952     """Check for split instances after a node assignment.
11953
11954     This method considers a series of node assignments as an atomic operation,
11955     and returns information about split instances after applying the set of
11956     changes.
11957
11958     In particular, it returns information about newly split instances, and
11959     instances that were already split, and remain so after the change.
11960
11961     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11962     considered.
11963
11964     @type changes: list of (node_name, new_group_uuid) pairs.
11965     @param changes: list of node assignments to consider.
11966     @param node_data: a dict with data for all nodes
11967     @param instance_data: a dict with all instances to consider
11968     @rtype: a two-tuple
11969     @return: a list of instances that were previously okay and result split as a
11970       consequence of this change, and a list of instances that were previously
11971       split and this change does not fix.
11972
11973     """
11974     changed_nodes = dict((node, group) for node, group in changes
11975                          if node_data[node].group != group)
11976
11977     all_split_instances = set()
11978     previously_split_instances = set()
11979
11980     def InstanceNodes(instance):
11981       return [instance.primary_node] + list(instance.secondary_nodes)
11982
11983     for inst in instance_data.values():
11984       if inst.disk_template not in constants.DTS_INT_MIRROR:
11985         continue
11986
11987       instance_nodes = InstanceNodes(inst)
11988
11989       if len(set(node_data[node].group for node in instance_nodes)) > 1:
11990         previously_split_instances.add(inst.name)
11991
11992       if len(set(changed_nodes.get(node, node_data[node].group)
11993                  for node in instance_nodes)) > 1:
11994         all_split_instances.add(inst.name)
11995
11996     return (list(all_split_instances - previously_split_instances),
11997             list(previously_split_instances & all_split_instances))
11998
11999
12000 class _GroupQuery(_QueryBase):
12001   FIELDS = query.GROUP_FIELDS
12002
12003   def ExpandNames(self, lu):
12004     lu.needed_locks = {}
12005
12006     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12007     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12008
12009     if not self.names:
12010       self.wanted = [name_to_uuid[name]
12011                      for name in utils.NiceSort(name_to_uuid.keys())]
12012     else:
12013       # Accept names to be either names or UUIDs.
12014       missing = []
12015       self.wanted = []
12016       all_uuid = frozenset(self._all_groups.keys())
12017
12018       for name in self.names:
12019         if name in all_uuid:
12020           self.wanted.append(name)
12021         elif name in name_to_uuid:
12022           self.wanted.append(name_to_uuid[name])
12023         else:
12024           missing.append(name)
12025
12026       if missing:
12027         raise errors.OpPrereqError("Some groups do not exist: %s" %
12028                                    utils.CommaJoin(missing),
12029                                    errors.ECODE_NOENT)
12030
12031   def DeclareLocks(self, lu, level):
12032     pass
12033
12034   def _GetQueryData(self, lu):
12035     """Computes the list of node groups and their attributes.
12036
12037     """
12038     do_nodes = query.GQ_NODE in self.requested_data
12039     do_instances = query.GQ_INST in self.requested_data
12040
12041     group_to_nodes = None
12042     group_to_instances = None
12043
12044     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12045     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12046     # latter GetAllInstancesInfo() is not enough, for we have to go through
12047     # instance->node. Hence, we will need to process nodes even if we only need
12048     # instance information.
12049     if do_nodes or do_instances:
12050       all_nodes = lu.cfg.GetAllNodesInfo()
12051       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12052       node_to_group = {}
12053
12054       for node in all_nodes.values():
12055         if node.group in group_to_nodes:
12056           group_to_nodes[node.group].append(node.name)
12057           node_to_group[node.name] = node.group
12058
12059       if do_instances:
12060         all_instances = lu.cfg.GetAllInstancesInfo()
12061         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12062
12063         for instance in all_instances.values():
12064           node = instance.primary_node
12065           if node in node_to_group:
12066             group_to_instances[node_to_group[node]].append(instance.name)
12067
12068         if not do_nodes:
12069           # Do not pass on node information if it was not requested.
12070           group_to_nodes = None
12071
12072     return query.GroupQueryData([self._all_groups[uuid]
12073                                  for uuid in self.wanted],
12074                                 group_to_nodes, group_to_instances)
12075
12076
12077 class LUGroupQuery(NoHooksLU):
12078   """Logical unit for querying node groups.
12079
12080   """
12081   REQ_BGL = False
12082
12083   def CheckArguments(self):
12084     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12085                           self.op.output_fields, False)
12086
12087   def ExpandNames(self):
12088     self.gq.ExpandNames(self)
12089
12090   def DeclareLocks(self, level):
12091     self.gq.DeclareLocks(self, level)
12092
12093   def Exec(self, feedback_fn):
12094     return self.gq.OldStyleQuery(self)
12095
12096
12097 class LUGroupSetParams(LogicalUnit):
12098   """Modifies the parameters of a node group.
12099
12100   """
12101   HPATH = "group-modify"
12102   HTYPE = constants.HTYPE_GROUP
12103   REQ_BGL = False
12104
12105   def CheckArguments(self):
12106     all_changes = [
12107       self.op.ndparams,
12108       self.op.alloc_policy,
12109       ]
12110
12111     if all_changes.count(None) == len(all_changes):
12112       raise errors.OpPrereqError("Please pass at least one modification",
12113                                  errors.ECODE_INVAL)
12114
12115   def ExpandNames(self):
12116     # This raises errors.OpPrereqError on its own:
12117     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12118
12119     self.needed_locks = {
12120       locking.LEVEL_NODEGROUP: [self.group_uuid],
12121       }
12122
12123   def CheckPrereq(self):
12124     """Check prerequisites.
12125
12126     """
12127     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12128
12129     if self.group is None:
12130       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12131                                (self.op.group_name, self.group_uuid))
12132
12133     if self.op.ndparams:
12134       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12135       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12136       self.new_ndparams = new_ndparams
12137
12138   def BuildHooksEnv(self):
12139     """Build hooks env.
12140
12141     """
12142     return {
12143       "GROUP_NAME": self.op.group_name,
12144       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12145       }
12146
12147   def BuildHooksNodes(self):
12148     """Build hooks nodes.
12149
12150     """
12151     mn = self.cfg.GetMasterNode()
12152     return ([mn], [mn])
12153
12154   def Exec(self, feedback_fn):
12155     """Modifies the node group.
12156
12157     """
12158     result = []
12159
12160     if self.op.ndparams:
12161       self.group.ndparams = self.new_ndparams
12162       result.append(("ndparams", str(self.group.ndparams)))
12163
12164     if self.op.alloc_policy:
12165       self.group.alloc_policy = self.op.alloc_policy
12166
12167     self.cfg.Update(self.group, feedback_fn)
12168     return result
12169
12170
12171 class LUGroupRemove(LogicalUnit):
12172   HPATH = "group-remove"
12173   HTYPE = constants.HTYPE_GROUP
12174   REQ_BGL = False
12175
12176   def ExpandNames(self):
12177     # This will raises errors.OpPrereqError on its own:
12178     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12179     self.needed_locks = {
12180       locking.LEVEL_NODEGROUP: [self.group_uuid],
12181       }
12182
12183   def CheckPrereq(self):
12184     """Check prerequisites.
12185
12186     This checks that the given group name exists as a node group, that is
12187     empty (i.e., contains no nodes), and that is not the last group of the
12188     cluster.
12189
12190     """
12191     # Verify that the group is empty.
12192     group_nodes = [node.name
12193                    for node in self.cfg.GetAllNodesInfo().values()
12194                    if node.group == self.group_uuid]
12195
12196     if group_nodes:
12197       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12198                                  " nodes: %s" %
12199                                  (self.op.group_name,
12200                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12201                                  errors.ECODE_STATE)
12202
12203     # Verify the cluster would not be left group-less.
12204     if len(self.cfg.GetNodeGroupList()) == 1:
12205       raise errors.OpPrereqError("Group '%s' is the only group,"
12206                                  " cannot be removed" %
12207                                  self.op.group_name,
12208                                  errors.ECODE_STATE)
12209
12210   def BuildHooksEnv(self):
12211     """Build hooks env.
12212
12213     """
12214     return {
12215       "GROUP_NAME": self.op.group_name,
12216       }
12217
12218   def BuildHooksNodes(self):
12219     """Build hooks nodes.
12220
12221     """
12222     mn = self.cfg.GetMasterNode()
12223     return ([mn], [mn])
12224
12225   def Exec(self, feedback_fn):
12226     """Remove the node group.
12227
12228     """
12229     try:
12230       self.cfg.RemoveNodeGroup(self.group_uuid)
12231     except errors.ConfigurationError:
12232       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12233                                (self.op.group_name, self.group_uuid))
12234
12235     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12236
12237
12238 class LUGroupRename(LogicalUnit):
12239   HPATH = "group-rename"
12240   HTYPE = constants.HTYPE_GROUP
12241   REQ_BGL = False
12242
12243   def ExpandNames(self):
12244     # This raises errors.OpPrereqError on its own:
12245     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12246
12247     self.needed_locks = {
12248       locking.LEVEL_NODEGROUP: [self.group_uuid],
12249       }
12250
12251   def CheckPrereq(self):
12252     """Check prerequisites.
12253
12254     Ensures requested new name is not yet used.
12255
12256     """
12257     try:
12258       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12259     except errors.OpPrereqError:
12260       pass
12261     else:
12262       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12263                                  " node group (UUID: %s)" %
12264                                  (self.op.new_name, new_name_uuid),
12265                                  errors.ECODE_EXISTS)
12266
12267   def BuildHooksEnv(self):
12268     """Build hooks env.
12269
12270     """
12271     return {
12272       "OLD_NAME": self.op.group_name,
12273       "NEW_NAME": self.op.new_name,
12274       }
12275
12276   def BuildHooksNodes(self):
12277     """Build hooks nodes.
12278
12279     """
12280     mn = self.cfg.GetMasterNode()
12281
12282     all_nodes = self.cfg.GetAllNodesInfo()
12283     all_nodes.pop(mn, None)
12284
12285     run_nodes = [mn]
12286     run_nodes.extend(node.name for node in all_nodes.values()
12287                      if node.group == self.group_uuid)
12288
12289     return (run_nodes, run_nodes)
12290
12291   def Exec(self, feedback_fn):
12292     """Rename the node group.
12293
12294     """
12295     group = self.cfg.GetNodeGroup(self.group_uuid)
12296
12297     if group is None:
12298       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12299                                (self.op.group_name, self.group_uuid))
12300
12301     group.name = self.op.new_name
12302     self.cfg.Update(group, feedback_fn)
12303
12304     return self.op.new_name
12305
12306
12307 class LUGroupEvacuate(LogicalUnit):
12308   HPATH = "group-evacuate"
12309   HTYPE = constants.HTYPE_GROUP
12310   REQ_BGL = False
12311
12312   def ExpandNames(self):
12313     # This raises errors.OpPrereqError on its own:
12314     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12315
12316     if self.op.target_groups:
12317       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12318                                   self.op.target_groups)
12319     else:
12320       self.req_target_uuids = []
12321
12322     if self.group_uuid in self.req_target_uuids:
12323       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12324                                  " as a target group (targets are %s)" %
12325                                  (self.group_uuid,
12326                                   utils.CommaJoin(self.req_target_uuids)),
12327                                  errors.ECODE_INVAL)
12328
12329     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12330
12331     self.share_locks = _ShareAll()
12332     self.needed_locks = {
12333       locking.LEVEL_INSTANCE: [],
12334       locking.LEVEL_NODEGROUP: [],
12335       locking.LEVEL_NODE: [],
12336       }
12337
12338   def DeclareLocks(self, level):
12339     if level == locking.LEVEL_INSTANCE:
12340       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12341
12342       # Lock instances optimistically, needs verification once node and group
12343       # locks have been acquired
12344       self.needed_locks[locking.LEVEL_INSTANCE] = \
12345         self.cfg.GetNodeGroupInstances(self.group_uuid)
12346
12347     elif level == locking.LEVEL_NODEGROUP:
12348       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12349
12350       if self.req_target_uuids:
12351         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12352
12353         # Lock all groups used by instances optimistically; this requires going
12354         # via the node before it's locked, requiring verification later on
12355         lock_groups.update(group_uuid
12356                            for instance_name in
12357                              self.owned_locks(locking.LEVEL_INSTANCE)
12358                            for group_uuid in
12359                              self.cfg.GetInstanceNodeGroups(instance_name))
12360       else:
12361         # No target groups, need to lock all of them
12362         lock_groups = locking.ALL_SET
12363
12364       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12365
12366     elif level == locking.LEVEL_NODE:
12367       # This will only lock the nodes in the group to be evacuated which
12368       # contain actual instances
12369       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12370       self._LockInstancesNodes()
12371
12372       # Lock all nodes in group to be evacuated and target groups
12373       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12374       assert self.group_uuid in owned_groups
12375       member_nodes = [node_name
12376                       for group in owned_groups
12377                       for node_name in self.cfg.GetNodeGroup(group).members]
12378       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12379
12380   def CheckPrereq(self):
12381     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12382     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12383     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12384
12385     assert owned_groups.issuperset(self.req_target_uuids)
12386     assert self.group_uuid in owned_groups
12387
12388     # Check if locked instances are still correct
12389     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12390
12391     # Get instance information
12392     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12393
12394     # Check if node groups for locked instances are still correct
12395     for instance_name in owned_instances:
12396       inst = self.instances[instance_name]
12397       assert owned_nodes.issuperset(inst.all_nodes), \
12398         "Instance %s's nodes changed while we kept the lock" % instance_name
12399
12400       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12401                                              owned_groups)
12402
12403       assert self.group_uuid in inst_groups, \
12404         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12405
12406     if self.req_target_uuids:
12407       # User requested specific target groups
12408       self.target_uuids = self.req_target_uuids
12409     else:
12410       # All groups except the one to be evacuated are potential targets
12411       self.target_uuids = [group_uuid for group_uuid in owned_groups
12412                            if group_uuid != self.group_uuid]
12413
12414       if not self.target_uuids:
12415         raise errors.OpPrereqError("There are no possible target groups",
12416                                    errors.ECODE_INVAL)
12417
12418   def BuildHooksEnv(self):
12419     """Build hooks env.
12420
12421     """
12422     return {
12423       "GROUP_NAME": self.op.group_name,
12424       "TARGET_GROUPS": " ".join(self.target_uuids),
12425       }
12426
12427   def BuildHooksNodes(self):
12428     """Build hooks nodes.
12429
12430     """
12431     mn = self.cfg.GetMasterNode()
12432
12433     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12434
12435     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12436
12437     return (run_nodes, run_nodes)
12438
12439   def Exec(self, feedback_fn):
12440     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12441
12442     assert self.group_uuid not in self.target_uuids
12443
12444     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12445                      instances=instances, target_groups=self.target_uuids)
12446
12447     ial.Run(self.op.iallocator)
12448
12449     if not ial.success:
12450       raise errors.OpPrereqError("Can't compute group evacuation using"
12451                                  " iallocator '%s': %s" %
12452                                  (self.op.iallocator, ial.info),
12453                                  errors.ECODE_NORES)
12454
12455     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12456
12457     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12458                  len(jobs), self.op.group_name)
12459
12460     return ResultWithJobs(jobs)
12461
12462
12463 class TagsLU(NoHooksLU): # pylint: disable=W0223
12464   """Generic tags LU.
12465
12466   This is an abstract class which is the parent of all the other tags LUs.
12467
12468   """
12469   def ExpandNames(self):
12470     self.group_uuid = None
12471     self.needed_locks = {}
12472     if self.op.kind == constants.TAG_NODE:
12473       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12474       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12475     elif self.op.kind == constants.TAG_INSTANCE:
12476       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12477       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12478     elif self.op.kind == constants.TAG_NODEGROUP:
12479       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12480
12481     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12482     # not possible to acquire the BGL based on opcode parameters)
12483
12484   def CheckPrereq(self):
12485     """Check prerequisites.
12486
12487     """
12488     if self.op.kind == constants.TAG_CLUSTER:
12489       self.target = self.cfg.GetClusterInfo()
12490     elif self.op.kind == constants.TAG_NODE:
12491       self.target = self.cfg.GetNodeInfo(self.op.name)
12492     elif self.op.kind == constants.TAG_INSTANCE:
12493       self.target = self.cfg.GetInstanceInfo(self.op.name)
12494     elif self.op.kind == constants.TAG_NODEGROUP:
12495       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12496     else:
12497       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12498                                  str(self.op.kind), errors.ECODE_INVAL)
12499
12500
12501 class LUTagsGet(TagsLU):
12502   """Returns the tags of a given object.
12503
12504   """
12505   REQ_BGL = False
12506
12507   def ExpandNames(self):
12508     TagsLU.ExpandNames(self)
12509
12510     # Share locks as this is only a read operation
12511     self.share_locks = _ShareAll()
12512
12513   def Exec(self, feedback_fn):
12514     """Returns the tag list.
12515
12516     """
12517     return list(self.target.GetTags())
12518
12519
12520 class LUTagsSearch(NoHooksLU):
12521   """Searches the tags for a given pattern.
12522
12523   """
12524   REQ_BGL = False
12525
12526   def ExpandNames(self):
12527     self.needed_locks = {}
12528
12529   def CheckPrereq(self):
12530     """Check prerequisites.
12531
12532     This checks the pattern passed for validity by compiling it.
12533
12534     """
12535     try:
12536       self.re = re.compile(self.op.pattern)
12537     except re.error, err:
12538       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12539                                  (self.op.pattern, err), errors.ECODE_INVAL)
12540
12541   def Exec(self, feedback_fn):
12542     """Returns the tag list.
12543
12544     """
12545     cfg = self.cfg
12546     tgts = [("/cluster", cfg.GetClusterInfo())]
12547     ilist = cfg.GetAllInstancesInfo().values()
12548     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12549     nlist = cfg.GetAllNodesInfo().values()
12550     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12551     tgts.extend(("/nodegroup/%s" % n.name, n)
12552                 for n in cfg.GetAllNodeGroupsInfo().values())
12553     results = []
12554     for path, target in tgts:
12555       for tag in target.GetTags():
12556         if self.re.search(tag):
12557           results.append((path, tag))
12558     return results
12559
12560
12561 class LUTagsSet(TagsLU):
12562   """Sets a tag on a given object.
12563
12564   """
12565   REQ_BGL = False
12566
12567   def CheckPrereq(self):
12568     """Check prerequisites.
12569
12570     This checks the type and length of the tag name and value.
12571
12572     """
12573     TagsLU.CheckPrereq(self)
12574     for tag in self.op.tags:
12575       objects.TaggableObject.ValidateTag(tag)
12576
12577   def Exec(self, feedback_fn):
12578     """Sets the tag.
12579
12580     """
12581     try:
12582       for tag in self.op.tags:
12583         self.target.AddTag(tag)
12584     except errors.TagError, err:
12585       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12586     self.cfg.Update(self.target, feedback_fn)
12587
12588
12589 class LUTagsDel(TagsLU):
12590   """Delete a list of tags from a given object.
12591
12592   """
12593   REQ_BGL = False
12594
12595   def CheckPrereq(self):
12596     """Check prerequisites.
12597
12598     This checks that we have the given tag.
12599
12600     """
12601     TagsLU.CheckPrereq(self)
12602     for tag in self.op.tags:
12603       objects.TaggableObject.ValidateTag(tag)
12604     del_tags = frozenset(self.op.tags)
12605     cur_tags = self.target.GetTags()
12606
12607     diff_tags = del_tags - cur_tags
12608     if diff_tags:
12609       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12610       raise errors.OpPrereqError("Tag(s) %s not found" %
12611                                  (utils.CommaJoin(diff_names), ),
12612                                  errors.ECODE_NOENT)
12613
12614   def Exec(self, feedback_fn):
12615     """Remove the tag from the object.
12616
12617     """
12618     for tag in self.op.tags:
12619       self.target.RemoveTag(tag)
12620     self.cfg.Update(self.target, feedback_fn)
12621
12622
12623 class LUTestDelay(NoHooksLU):
12624   """Sleep for a specified amount of time.
12625
12626   This LU sleeps on the master and/or nodes for a specified amount of
12627   time.
12628
12629   """
12630   REQ_BGL = False
12631
12632   def ExpandNames(self):
12633     """Expand names and set required locks.
12634
12635     This expands the node list, if any.
12636
12637     """
12638     self.needed_locks = {}
12639     if self.op.on_nodes:
12640       # _GetWantedNodes can be used here, but is not always appropriate to use
12641       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12642       # more information.
12643       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12644       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12645
12646   def _TestDelay(self):
12647     """Do the actual sleep.
12648
12649     """
12650     if self.op.on_master:
12651       if not utils.TestDelay(self.op.duration):
12652         raise errors.OpExecError("Error during master delay test")
12653     if self.op.on_nodes:
12654       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12655       for node, node_result in result.items():
12656         node_result.Raise("Failure during rpc call to node %s" % node)
12657
12658   def Exec(self, feedback_fn):
12659     """Execute the test delay opcode, with the wanted repetitions.
12660
12661     """
12662     if self.op.repeat == 0:
12663       self._TestDelay()
12664     else:
12665       top_value = self.op.repeat - 1
12666       for i in range(self.op.repeat):
12667         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12668         self._TestDelay()
12669
12670
12671 class LUTestJqueue(NoHooksLU):
12672   """Utility LU to test some aspects of the job queue.
12673
12674   """
12675   REQ_BGL = False
12676
12677   # Must be lower than default timeout for WaitForJobChange to see whether it
12678   # notices changed jobs
12679   _CLIENT_CONNECT_TIMEOUT = 20.0
12680   _CLIENT_CONFIRM_TIMEOUT = 60.0
12681
12682   @classmethod
12683   def _NotifyUsingSocket(cls, cb, errcls):
12684     """Opens a Unix socket and waits for another program to connect.
12685
12686     @type cb: callable
12687     @param cb: Callback to send socket name to client
12688     @type errcls: class
12689     @param errcls: Exception class to use for errors
12690
12691     """
12692     # Using a temporary directory as there's no easy way to create temporary
12693     # sockets without writing a custom loop around tempfile.mktemp and
12694     # socket.bind
12695     tmpdir = tempfile.mkdtemp()
12696     try:
12697       tmpsock = utils.PathJoin(tmpdir, "sock")
12698
12699       logging.debug("Creating temporary socket at %s", tmpsock)
12700       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12701       try:
12702         sock.bind(tmpsock)
12703         sock.listen(1)
12704
12705         # Send details to client
12706         cb(tmpsock)
12707
12708         # Wait for client to connect before continuing
12709         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12710         try:
12711           (conn, _) = sock.accept()
12712         except socket.error, err:
12713           raise errcls("Client didn't connect in time (%s)" % err)
12714       finally:
12715         sock.close()
12716     finally:
12717       # Remove as soon as client is connected
12718       shutil.rmtree(tmpdir)
12719
12720     # Wait for client to close
12721     try:
12722       try:
12723         # pylint: disable=E1101
12724         # Instance of '_socketobject' has no ... member
12725         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12726         conn.recv(1)
12727       except socket.error, err:
12728         raise errcls("Client failed to confirm notification (%s)" % err)
12729     finally:
12730       conn.close()
12731
12732   def _SendNotification(self, test, arg, sockname):
12733     """Sends a notification to the client.
12734
12735     @type test: string
12736     @param test: Test name
12737     @param arg: Test argument (depends on test)
12738     @type sockname: string
12739     @param sockname: Socket path
12740
12741     """
12742     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12743
12744   def _Notify(self, prereq, test, arg):
12745     """Notifies the client of a test.
12746
12747     @type prereq: bool
12748     @param prereq: Whether this is a prereq-phase test
12749     @type test: string
12750     @param test: Test name
12751     @param arg: Test argument (depends on test)
12752
12753     """
12754     if prereq:
12755       errcls = errors.OpPrereqError
12756     else:
12757       errcls = errors.OpExecError
12758
12759     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12760                                                   test, arg),
12761                                    errcls)
12762
12763   def CheckArguments(self):
12764     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12765     self.expandnames_calls = 0
12766
12767   def ExpandNames(self):
12768     checkargs_calls = getattr(self, "checkargs_calls", 0)
12769     if checkargs_calls < 1:
12770       raise errors.ProgrammerError("CheckArguments was not called")
12771
12772     self.expandnames_calls += 1
12773
12774     if self.op.notify_waitlock:
12775       self._Notify(True, constants.JQT_EXPANDNAMES, None)
12776
12777     self.LogInfo("Expanding names")
12778
12779     # Get lock on master node (just to get a lock, not for a particular reason)
12780     self.needed_locks = {
12781       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12782       }
12783
12784   def Exec(self, feedback_fn):
12785     if self.expandnames_calls < 1:
12786       raise errors.ProgrammerError("ExpandNames was not called")
12787
12788     if self.op.notify_exec:
12789       self._Notify(False, constants.JQT_EXEC, None)
12790
12791     self.LogInfo("Executing")
12792
12793     if self.op.log_messages:
12794       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12795       for idx, msg in enumerate(self.op.log_messages):
12796         self.LogInfo("Sending log message %s", idx + 1)
12797         feedback_fn(constants.JQT_MSGPREFIX + msg)
12798         # Report how many test messages have been sent
12799         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12800
12801     if self.op.fail:
12802       raise errors.OpExecError("Opcode failure was requested")
12803
12804     return True
12805
12806
12807 class IAllocator(object):
12808   """IAllocator framework.
12809
12810   An IAllocator instance has three sets of attributes:
12811     - cfg that is needed to query the cluster
12812     - input data (all members of the _KEYS class attribute are required)
12813     - four buffer attributes (in|out_data|text), that represent the
12814       input (to the external script) in text and data structure format,
12815       and the output from it, again in two formats
12816     - the result variables from the script (success, info, nodes) for
12817       easy usage
12818
12819   """
12820   # pylint: disable=R0902
12821   # lots of instance attributes
12822
12823   def __init__(self, cfg, rpc, mode, **kwargs):
12824     self.cfg = cfg
12825     self.rpc = rpc
12826     # init buffer variables
12827     self.in_text = self.out_text = self.in_data = self.out_data = None
12828     # init all input fields so that pylint is happy
12829     self.mode = mode
12830     self.memory = self.disks = self.disk_template = None
12831     self.os = self.tags = self.nics = self.vcpus = None
12832     self.hypervisor = None
12833     self.relocate_from = None
12834     self.name = None
12835     self.instances = None
12836     self.evac_mode = None
12837     self.target_groups = []
12838     # computed fields
12839     self.required_nodes = None
12840     # init result fields
12841     self.success = self.info = self.result = None
12842
12843     try:
12844       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12845     except KeyError:
12846       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12847                                    " IAllocator" % self.mode)
12848
12849     keyset = [n for (n, _) in keydata]
12850
12851     for key in kwargs:
12852       if key not in keyset:
12853         raise errors.ProgrammerError("Invalid input parameter '%s' to"
12854                                      " IAllocator" % key)
12855       setattr(self, key, kwargs[key])
12856
12857     for key in keyset:
12858       if key not in kwargs:
12859         raise errors.ProgrammerError("Missing input parameter '%s' to"
12860                                      " IAllocator" % key)
12861     self._BuildInputData(compat.partial(fn, self), keydata)
12862
12863   def _ComputeClusterData(self):
12864     """Compute the generic allocator input data.
12865
12866     This is the data that is independent of the actual operation.
12867
12868     """
12869     cfg = self.cfg
12870     cluster_info = cfg.GetClusterInfo()
12871     # cluster data
12872     data = {
12873       "version": constants.IALLOCATOR_VERSION,
12874       "cluster_name": cfg.GetClusterName(),
12875       "cluster_tags": list(cluster_info.GetTags()),
12876       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12877       # we don't have job IDs
12878       }
12879     ninfo = cfg.GetAllNodesInfo()
12880     iinfo = cfg.GetAllInstancesInfo().values()
12881     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12882
12883     # node data
12884     node_list = [n.name for n in ninfo.values() if n.vm_capable]
12885
12886     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12887       hypervisor_name = self.hypervisor
12888     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12889       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12890     else:
12891       hypervisor_name = cluster_info.enabled_hypervisors[0]
12892
12893     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12894                                         hypervisor_name)
12895     node_iinfo = \
12896       self.rpc.call_all_instances_info(node_list,
12897                                        cluster_info.enabled_hypervisors)
12898
12899     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12900
12901     config_ndata = self._ComputeBasicNodeData(ninfo)
12902     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12903                                                  i_list, config_ndata)
12904     assert len(data["nodes"]) == len(ninfo), \
12905         "Incomplete node data computed"
12906
12907     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12908
12909     self.in_data = data
12910
12911   @staticmethod
12912   def _ComputeNodeGroupData(cfg):
12913     """Compute node groups data.
12914
12915     """
12916     ng = dict((guuid, {
12917       "name": gdata.name,
12918       "alloc_policy": gdata.alloc_policy,
12919       })
12920       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12921
12922     return ng
12923
12924   @staticmethod
12925   def _ComputeBasicNodeData(node_cfg):
12926     """Compute global node data.
12927
12928     @rtype: dict
12929     @returns: a dict of name: (node dict, node config)
12930
12931     """
12932     # fill in static (config-based) values
12933     node_results = dict((ninfo.name, {
12934       "tags": list(ninfo.GetTags()),
12935       "primary_ip": ninfo.primary_ip,
12936       "secondary_ip": ninfo.secondary_ip,
12937       "offline": ninfo.offline,
12938       "drained": ninfo.drained,
12939       "master_candidate": ninfo.master_candidate,
12940       "group": ninfo.group,
12941       "master_capable": ninfo.master_capable,
12942       "vm_capable": ninfo.vm_capable,
12943       })
12944       for ninfo in node_cfg.values())
12945
12946     return node_results
12947
12948   @staticmethod
12949   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12950                               node_results):
12951     """Compute global node data.
12952
12953     @param node_results: the basic node structures as filled from the config
12954
12955     """
12956     # make a copy of the current dict
12957     node_results = dict(node_results)
12958     for nname, nresult in node_data.items():
12959       assert nname in node_results, "Missing basic data for node %s" % nname
12960       ninfo = node_cfg[nname]
12961
12962       if not (ninfo.offline or ninfo.drained):
12963         nresult.Raise("Can't get data for node %s" % nname)
12964         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12965                                 nname)
12966         remote_info = nresult.payload
12967
12968         for attr in ["memory_total", "memory_free", "memory_dom0",
12969                      "vg_size", "vg_free", "cpu_total"]:
12970           if attr not in remote_info:
12971             raise errors.OpExecError("Node '%s' didn't return attribute"
12972                                      " '%s'" % (nname, attr))
12973           if not isinstance(remote_info[attr], int):
12974             raise errors.OpExecError("Node '%s' returned invalid value"
12975                                      " for '%s': %s" %
12976                                      (nname, attr, remote_info[attr]))
12977         # compute memory used by primary instances
12978         i_p_mem = i_p_up_mem = 0
12979         for iinfo, beinfo in i_list:
12980           if iinfo.primary_node == nname:
12981             i_p_mem += beinfo[constants.BE_MEMORY]
12982             if iinfo.name not in node_iinfo[nname].payload:
12983               i_used_mem = 0
12984             else:
12985               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12986             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12987             remote_info["memory_free"] -= max(0, i_mem_diff)
12988
12989             if iinfo.admin_up:
12990               i_p_up_mem += beinfo[constants.BE_MEMORY]
12991
12992         # compute memory used by instances
12993         pnr_dyn = {
12994           "total_memory": remote_info["memory_total"],
12995           "reserved_memory": remote_info["memory_dom0"],
12996           "free_memory": remote_info["memory_free"],
12997           "total_disk": remote_info["vg_size"],
12998           "free_disk": remote_info["vg_free"],
12999           "total_cpus": remote_info["cpu_total"],
13000           "i_pri_memory": i_p_mem,
13001           "i_pri_up_memory": i_p_up_mem,
13002           }
13003         pnr_dyn.update(node_results[nname])
13004         node_results[nname] = pnr_dyn
13005
13006     return node_results
13007
13008   @staticmethod
13009   def _ComputeInstanceData(cluster_info, i_list):
13010     """Compute global instance data.
13011
13012     """
13013     instance_data = {}
13014     for iinfo, beinfo in i_list:
13015       nic_data = []
13016       for nic in iinfo.nics:
13017         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13018         nic_dict = {
13019           "mac": nic.mac,
13020           "ip": nic.ip,
13021           "mode": filled_params[constants.NIC_MODE],
13022           "link": filled_params[constants.NIC_LINK],
13023           }
13024         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13025           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13026         nic_data.append(nic_dict)
13027       pir = {
13028         "tags": list(iinfo.GetTags()),
13029         "admin_up": iinfo.admin_up,
13030         "vcpus": beinfo[constants.BE_VCPUS],
13031         "memory": beinfo[constants.BE_MEMORY],
13032         "os": iinfo.os,
13033         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13034         "nics": nic_data,
13035         "disks": [{constants.IDISK_SIZE: dsk.size,
13036                    constants.IDISK_MODE: dsk.mode}
13037                   for dsk in iinfo.disks],
13038         "disk_template": iinfo.disk_template,
13039         "hypervisor": iinfo.hypervisor,
13040         }
13041       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13042                                                  pir["disks"])
13043       instance_data[iinfo.name] = pir
13044
13045     return instance_data
13046
13047   def _AddNewInstance(self):
13048     """Add new instance data to allocator structure.
13049
13050     This in combination with _AllocatorGetClusterData will create the
13051     correct structure needed as input for the allocator.
13052
13053     The checks for the completeness of the opcode must have already been
13054     done.
13055
13056     """
13057     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13058
13059     if self.disk_template in constants.DTS_INT_MIRROR:
13060       self.required_nodes = 2
13061     else:
13062       self.required_nodes = 1
13063
13064     request = {
13065       "name": self.name,
13066       "disk_template": self.disk_template,
13067       "tags": self.tags,
13068       "os": self.os,
13069       "vcpus": self.vcpus,
13070       "memory": self.memory,
13071       "disks": self.disks,
13072       "disk_space_total": disk_space,
13073       "nics": self.nics,
13074       "required_nodes": self.required_nodes,
13075       "hypervisor": self.hypervisor,
13076       }
13077
13078     return request
13079
13080   def _AddRelocateInstance(self):
13081     """Add relocate instance data to allocator structure.
13082
13083     This in combination with _IAllocatorGetClusterData will create the
13084     correct structure needed as input for the allocator.
13085
13086     The checks for the completeness of the opcode must have already been
13087     done.
13088
13089     """
13090     instance = self.cfg.GetInstanceInfo(self.name)
13091     if instance is None:
13092       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13093                                    " IAllocator" % self.name)
13094
13095     if instance.disk_template not in constants.DTS_MIRRORED:
13096       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13097                                  errors.ECODE_INVAL)
13098
13099     if instance.disk_template in constants.DTS_INT_MIRROR and \
13100         len(instance.secondary_nodes) != 1:
13101       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13102                                  errors.ECODE_STATE)
13103
13104     self.required_nodes = 1
13105     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13106     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13107
13108     request = {
13109       "name": self.name,
13110       "disk_space_total": disk_space,
13111       "required_nodes": self.required_nodes,
13112       "relocate_from": self.relocate_from,
13113       }
13114     return request
13115
13116   def _AddNodeEvacuate(self):
13117     """Get data for node-evacuate requests.
13118
13119     """
13120     return {
13121       "instances": self.instances,
13122       "evac_mode": self.evac_mode,
13123       }
13124
13125   def _AddChangeGroup(self):
13126     """Get data for node-evacuate requests.
13127
13128     """
13129     return {
13130       "instances": self.instances,
13131       "target_groups": self.target_groups,
13132       }
13133
13134   def _BuildInputData(self, fn, keydata):
13135     """Build input data structures.
13136
13137     """
13138     self._ComputeClusterData()
13139
13140     request = fn()
13141     request["type"] = self.mode
13142     for keyname, keytype in keydata:
13143       if keyname not in request:
13144         raise errors.ProgrammerError("Request parameter %s is missing" %
13145                                      keyname)
13146       val = request[keyname]
13147       if not keytype(val):
13148         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13149                                      " validation, value %s, expected"
13150                                      " type %s" % (keyname, val, keytype))
13151     self.in_data["request"] = request
13152
13153     self.in_text = serializer.Dump(self.in_data)
13154
13155   _STRING_LIST = ht.TListOf(ht.TString)
13156   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13157      # pylint: disable=E1101
13158      # Class '...' has no 'OP_ID' member
13159      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13160                           opcodes.OpInstanceMigrate.OP_ID,
13161                           opcodes.OpInstanceReplaceDisks.OP_ID])
13162      })))
13163
13164   _NEVAC_MOVED = \
13165     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13166                        ht.TItems([ht.TNonEmptyString,
13167                                   ht.TNonEmptyString,
13168                                   ht.TListOf(ht.TNonEmptyString),
13169                                  ])))
13170   _NEVAC_FAILED = \
13171     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13172                        ht.TItems([ht.TNonEmptyString,
13173                                   ht.TMaybeString,
13174                                  ])))
13175   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13176                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13177
13178   _MODE_DATA = {
13179     constants.IALLOCATOR_MODE_ALLOC:
13180       (_AddNewInstance,
13181        [
13182         ("name", ht.TString),
13183         ("memory", ht.TInt),
13184         ("disks", ht.TListOf(ht.TDict)),
13185         ("disk_template", ht.TString),
13186         ("os", ht.TString),
13187         ("tags", _STRING_LIST),
13188         ("nics", ht.TListOf(ht.TDict)),
13189         ("vcpus", ht.TInt),
13190         ("hypervisor", ht.TString),
13191         ], ht.TList),
13192     constants.IALLOCATOR_MODE_RELOC:
13193       (_AddRelocateInstance,
13194        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13195        ht.TList),
13196      constants.IALLOCATOR_MODE_NODE_EVAC:
13197       (_AddNodeEvacuate, [
13198         ("instances", _STRING_LIST),
13199         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13200         ], _NEVAC_RESULT),
13201      constants.IALLOCATOR_MODE_CHG_GROUP:
13202       (_AddChangeGroup, [
13203         ("instances", _STRING_LIST),
13204         ("target_groups", _STRING_LIST),
13205         ], _NEVAC_RESULT),
13206     }
13207
13208   def Run(self, name, validate=True, call_fn=None):
13209     """Run an instance allocator and return the results.
13210
13211     """
13212     if call_fn is None:
13213       call_fn = self.rpc.call_iallocator_runner
13214
13215     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13216     result.Raise("Failure while running the iallocator script")
13217
13218     self.out_text = result.payload
13219     if validate:
13220       self._ValidateResult()
13221
13222   def _ValidateResult(self):
13223     """Process the allocator results.
13224
13225     This will process and if successful save the result in
13226     self.out_data and the other parameters.
13227
13228     """
13229     try:
13230       rdict = serializer.Load(self.out_text)
13231     except Exception, err:
13232       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13233
13234     if not isinstance(rdict, dict):
13235       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13236
13237     # TODO: remove backwards compatiblity in later versions
13238     if "nodes" in rdict and "result" not in rdict:
13239       rdict["result"] = rdict["nodes"]
13240       del rdict["nodes"]
13241
13242     for key in "success", "info", "result":
13243       if key not in rdict:
13244         raise errors.OpExecError("Can't parse iallocator results:"
13245                                  " missing key '%s'" % key)
13246       setattr(self, key, rdict[key])
13247
13248     if not self._result_check(self.result):
13249       raise errors.OpExecError("Iallocator returned invalid result,"
13250                                " expected %s, got %s" %
13251                                (self._result_check, self.result),
13252                                errors.ECODE_INVAL)
13253
13254     if self.mode == constants.IALLOCATOR_MODE_RELOC:
13255       assert self.relocate_from is not None
13256       assert self.required_nodes == 1
13257
13258       node2group = dict((name, ndata["group"])
13259                         for (name, ndata) in self.in_data["nodes"].items())
13260
13261       fn = compat.partial(self._NodesToGroups, node2group,
13262                           self.in_data["nodegroups"])
13263
13264       instance = self.cfg.GetInstanceInfo(self.name)
13265       request_groups = fn(self.relocate_from + [instance.primary_node])
13266       result_groups = fn(rdict["result"] + [instance.primary_node])
13267
13268       if self.success and not set(result_groups).issubset(request_groups):
13269         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13270                                  " differ from original groups (%s)" %
13271                                  (utils.CommaJoin(result_groups),
13272                                   utils.CommaJoin(request_groups)))
13273
13274     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13275       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13276
13277     self.out_data = rdict
13278
13279   @staticmethod
13280   def _NodesToGroups(node2group, groups, nodes):
13281     """Returns a list of unique group names for a list of nodes.
13282
13283     @type node2group: dict
13284     @param node2group: Map from node name to group UUID
13285     @type groups: dict
13286     @param groups: Group information
13287     @type nodes: list
13288     @param nodes: Node names
13289
13290     """
13291     result = set()
13292
13293     for node in nodes:
13294       try:
13295         group_uuid = node2group[node]
13296       except KeyError:
13297         # Ignore unknown node
13298         pass
13299       else:
13300         try:
13301           group = groups[group_uuid]
13302         except KeyError:
13303           # Can't find group, let's use UUID
13304           group_name = group_uuid
13305         else:
13306           group_name = group["name"]
13307
13308         result.add(group_name)
13309
13310     return sorted(result)
13311
13312
13313 class LUTestAllocator(NoHooksLU):
13314   """Run allocator tests.
13315
13316   This LU runs the allocator tests
13317
13318   """
13319   def CheckPrereq(self):
13320     """Check prerequisites.
13321
13322     This checks the opcode parameters depending on the director and mode test.
13323
13324     """
13325     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13326       for attr in ["memory", "disks", "disk_template",
13327                    "os", "tags", "nics", "vcpus"]:
13328         if not hasattr(self.op, attr):
13329           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13330                                      attr, errors.ECODE_INVAL)
13331       iname = self.cfg.ExpandInstanceName(self.op.name)
13332       if iname is not None:
13333         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13334                                    iname, errors.ECODE_EXISTS)
13335       if not isinstance(self.op.nics, list):
13336         raise errors.OpPrereqError("Invalid parameter 'nics'",
13337                                    errors.ECODE_INVAL)
13338       if not isinstance(self.op.disks, list):
13339         raise errors.OpPrereqError("Invalid parameter 'disks'",
13340                                    errors.ECODE_INVAL)
13341       for row in self.op.disks:
13342         if (not isinstance(row, dict) or
13343             constants.IDISK_SIZE not in row or
13344             not isinstance(row[constants.IDISK_SIZE], int) or
13345             constants.IDISK_MODE not in row or
13346             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13347           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13348                                      " parameter", errors.ECODE_INVAL)
13349       if self.op.hypervisor is None:
13350         self.op.hypervisor = self.cfg.GetHypervisorType()
13351     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13352       fname = _ExpandInstanceName(self.cfg, self.op.name)
13353       self.op.name = fname
13354       self.relocate_from = \
13355           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13356     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13357                           constants.IALLOCATOR_MODE_NODE_EVAC):
13358       if not self.op.instances:
13359         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13360       self.op.instances = _GetWantedInstances(self, self.op.instances)
13361     else:
13362       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13363                                  self.op.mode, errors.ECODE_INVAL)
13364
13365     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13366       if self.op.allocator is None:
13367         raise errors.OpPrereqError("Missing allocator name",
13368                                    errors.ECODE_INVAL)
13369     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13370       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13371                                  self.op.direction, errors.ECODE_INVAL)
13372
13373   def Exec(self, feedback_fn):
13374     """Run the allocator test.
13375
13376     """
13377     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13378       ial = IAllocator(self.cfg, self.rpc,
13379                        mode=self.op.mode,
13380                        name=self.op.name,
13381                        memory=self.op.memory,
13382                        disks=self.op.disks,
13383                        disk_template=self.op.disk_template,
13384                        os=self.op.os,
13385                        tags=self.op.tags,
13386                        nics=self.op.nics,
13387                        vcpus=self.op.vcpus,
13388                        hypervisor=self.op.hypervisor,
13389                        )
13390     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13391       ial = IAllocator(self.cfg, self.rpc,
13392                        mode=self.op.mode,
13393                        name=self.op.name,
13394                        relocate_from=list(self.relocate_from),
13395                        )
13396     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13397       ial = IAllocator(self.cfg, self.rpc,
13398                        mode=self.op.mode,
13399                        instances=self.op.instances,
13400                        target_groups=self.op.target_groups)
13401     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13402       ial = IAllocator(self.cfg, self.rpc,
13403                        mode=self.op.mode,
13404                        instances=self.op.instances,
13405                        evac_mode=self.op.evac_mode)
13406     else:
13407       raise errors.ProgrammerError("Uncatched mode %s in"
13408                                    " LUTestAllocator.Exec", self.op.mode)
13409
13410     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13411       result = ial.in_text
13412     else:
13413       ial.Run(self.op.allocator, validate=False)
13414       result = ial.out_text
13415     return result
13416
13417
13418 #: Query type implementations
13419 _QUERY_IMPL = {
13420   constants.QR_INSTANCE: _InstanceQuery,
13421   constants.QR_NODE: _NodeQuery,
13422   constants.QR_GROUP: _GroupQuery,
13423   constants.QR_OS: _OsQuery,
13424   }
13425
13426 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13427
13428
13429 def _GetQueryImplementation(name):
13430   """Returns the implemtnation for a query type.
13431
13432   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13433
13434   """
13435   try:
13436     return _QUERY_IMPL[name]
13437   except KeyError:
13438     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13439                                errors.ECODE_INVAL)