code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62
  63 import ganeti.masterd.instance # pylint: disable=W0611
  64
  65
  66 class ResultWithJobs:
  67   """Data container for LU results with jobs.
  68
  69   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  70   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  71   contained in the C{jobs} attribute and include the job IDs in the opcode
  72   result.
  73
  74   """
  75   def __init__(self, jobs, **kwargs):
  76     """Initializes this class.
  77
  78     Additional return values can be specified as keyword arguments.
  79
  80     @type jobs: list of lists of L{opcode.OpCode}
  81     @param jobs: A list of lists of opcode objects
  82
  83     """
  84     self.jobs = jobs
  85     self.other = kwargs
  86
  87
  88 class LogicalUnit(object):
  89   """Logical Unit base class.
  90
  91   Subclasses must follow these rules:
  92     - implement ExpandNames
  93     - implement CheckPrereq (except when tasklets are used)
  94     - implement Exec (except when tasklets are used)
  95     - implement BuildHooksEnv
  96     - implement BuildHooksNodes
  97     - redefine HPATH and HTYPE
  98     - optionally redefine their run requirements:
  99         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 100
 101   Note that all commands require root permissions.
 102
 103   @ivar dry_run_result: the value (if any) that will be returned to the caller
 104       in dry-run mode (signalled by opcode dry_run parameter)
 105
 106   """
 107   HPATH = None
 108   HTYPE = None
 109   REQ_BGL = True
 110
 111   def __init__(self, processor, op, context, rpc):
 112     """Constructor for LogicalUnit.
 113
 114     This needs to be overridden in derived classes in order to check op
 115     validity.
 116
 117     """
 118     self.proc = processor
 119     self.op = op
 120     self.cfg = context.cfg
 121     self.glm = context.glm
 122     # readability alias
 123     self.owned_locks = context.glm.list_owned
 124     self.context = context
 125     self.rpc = rpc
 126     # Dicts used to declare locking needs to mcpu
 127     self.needed_locks = None
 128     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 129     self.add_locks = {}
 130     self.remove_locks = {}
 131     # Used to force good behavior when calling helper functions
 132     self.recalculate_locks = {}
 133     # logging
 134     self.Log = processor.Log # pylint: disable=C0103
 135     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 136     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 137     self.LogStep = processor.LogStep # pylint: disable=C0103
 138     # support for dry-run
 139     self.dry_run_result = None
 140     # support for generic debug attribute
 141     if (not hasattr(self.op, "debug_level") or
 142         not isinstance(self.op.debug_level, int)):
 143       self.op.debug_level = 0
 144
 145     # Tasklets
 146     self.tasklets = None
 147
 148     # Validate opcode parameters and set defaults
 149     self.op.Validate(True)
 150
 151     self.CheckArguments()
 152
 153   def CheckArguments(self):
 154     """Check syntactic validity for the opcode arguments.
 155
 156     This method is for doing a simple syntactic check and ensure
 157     validity of opcode parameters, without any cluster-related
 158     checks. While the same can be accomplished in ExpandNames and/or
 159     CheckPrereq, doing these separate is better because:
 160
 161       - ExpandNames is left as as purely a lock-related function
 162       - CheckPrereq is run after we have acquired locks (and possible
 163         waited for them)
 164
 165     The function is allowed to change the self.op attribute so that
 166     later methods can no longer worry about missing parameters.
 167
 168     """
 169     pass
 170
 171   def ExpandNames(self):
 172     """Expand names for this LU.
 173
 174     This method is called before starting to execute the opcode, and it should
 175     update all the parameters of the opcode to their canonical form (e.g. a
 176     short node name must be fully expanded after this method has successfully
 177     completed). This way locking, hooks, logging, etc. can work correctly.
 178
 179     LUs which implement this method must also populate the self.needed_locks
 180     member, as a dict with lock levels as keys, and a list of needed lock names
 181     as values. Rules:
 182
 183       - use an empty dict if you don't need any lock
 184       - if you don't need any lock at a particular level omit that level
 185       - don't put anything for the BGL level
 186       - if you want all locks at a level use locking.ALL_SET as a value
 187
 188     If you need to share locks (rather than acquire them exclusively) at one
 189     level you can modify self.share_locks, setting a true value (usually 1) for
 190     that level. By default locks are not shared.
 191
 192     This function can also define a list of tasklets, which then will be
 193     executed in order instead of the usual LU-level CheckPrereq and Exec
 194     functions, if those are not defined by the LU.
 195
 196     Examples::
 197
 198       # Acquire all nodes and one instance
 199       self.needed_locks = {
 200         locking.LEVEL_NODE: locking.ALL_SET,
 201         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 202       }
 203       # Acquire just two nodes
 204       self.needed_locks = {
 205         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 206       }
 207       # Acquire no locks
 208       self.needed_locks = {} # No, you can't leave it to the default value None
 209
 210     """
 211     # The implementation of this method is mandatory only if the new LU is
 212     # concurrent, so that old LUs don't need to be changed all at the same
 213     # time.
 214     if self.REQ_BGL:
 215       self.needed_locks = {} # Exclusive LUs don't need locks.
 216     else:
 217       raise NotImplementedError
 218
 219   def DeclareLocks(self, level):
 220     """Declare LU locking needs for a level
 221
 222     While most LUs can just declare their locking needs at ExpandNames time,
 223     sometimes there's the need to calculate some locks after having acquired
 224     the ones before. This function is called just before acquiring locks at a
 225     particular level, but after acquiring the ones at lower levels, and permits
 226     such calculations. It can be used to modify self.needed_locks, and by
 227     default it does nothing.
 228
 229     This function is only called if you have something already set in
 230     self.needed_locks for the level.
 231
 232     @param level: Locking level which is going to be locked
 233     @type level: member of ganeti.locking.LEVELS
 234
 235     """
 236
 237   def CheckPrereq(self):
 238     """Check prerequisites for this LU.
 239
 240     This method should check that the prerequisites for the execution
 241     of this LU are fulfilled. It can do internode communication, but
 242     it should be idempotent - no cluster or system changes are
 243     allowed.
 244
 245     The method should raise errors.OpPrereqError in case something is
 246     not fulfilled. Its return value is ignored.
 247
 248     This method should also update all the parameters of the opcode to
 249     their canonical form if it hasn't been done by ExpandNames before.
 250
 251     """
 252     if self.tasklets is not None:
 253       for (idx, tl) in enumerate(self.tasklets):
 254         logging.debug("Checking prerequisites for tasklet %s/%s",
 255                       idx + 1, len(self.tasklets))
 256         tl.CheckPrereq()
 257     else:
 258       pass
 259
 260   def Exec(self, feedback_fn):
 261     """Execute the LU.
 262
 263     This method should implement the actual work. It should raise
 264     errors.OpExecError for failures that are somewhat dealt with in
 265     code, or expected.
 266
 267     """
 268     if self.tasklets is not None:
 269       for (idx, tl) in enumerate(self.tasklets):
 270         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 271         tl.Exec(feedback_fn)
 272     else:
 273       raise NotImplementedError
 274
 275   def BuildHooksEnv(self):
 276     """Build hooks environment for this LU.
 277
 278     @rtype: dict
 279     @return: Dictionary containing the environment that will be used for
 280       running the hooks for this LU. The keys of the dict must not be prefixed
 281       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 282       will extend the environment with additional variables. If no environment
 283       should be defined, an empty dictionary should be returned (not C{None}).
 284     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 285       will not be called.
 286
 287     """
 288     raise NotImplementedError
 289
 290   def BuildHooksNodes(self):
 291     """Build list of nodes to run LU's hooks.
 292
 293     @rtype: tuple; (list, list)
 294     @return: Tuple containing a list of node names on which the hook
 295       should run before the execution and a list of node names on which the
 296       hook should run after the execution. No nodes should be returned as an
 297       empty list (and not None).
 298     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 299       will not be called.
 300
 301     """
 302     raise NotImplementedError
 303
 304   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 305     """Notify the LU about the results of its hooks.
 306
 307     This method is called every time a hooks phase is executed, and notifies
 308     the Logical Unit about the hooks' result. The LU can then use it to alter
 309     its result based on the hooks.  By default the method does nothing and the
 310     previous result is passed back unchanged but any LU can define it if it
 311     wants to use the local cluster hook-scripts somehow.
 312
 313     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 314         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 315     @param hook_results: the results of the multi-node hooks rpc call
 316     @param feedback_fn: function used send feedback back to the caller
 317     @param lu_result: the previous Exec result this LU had, or None
 318         in the PRE phase
 319     @return: the new Exec result, based on the previous result
 320         and hook results
 321
 322     """
 323     # API must be kept, thus we ignore the unused argument and could
 324     # be a function warnings
 325     # pylint: disable=W0613,R0201
 326     return lu_result
 327
 328   def _ExpandAndLockInstance(self):
 329     """Helper function to expand and lock an instance.
 330
 331     Many LUs that work on an instance take its name in self.op.instance_name
 332     and need to expand it and then declare the expanded name for locking. This
 333     function does it, and then updates self.op.instance_name to the expanded
 334     name. It also initializes needed_locks as a dict, if this hasn't been done
 335     before.
 336
 337     """
 338     if self.needed_locks is None:
 339       self.needed_locks = {}
 340     else:
 341       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 342         "_ExpandAndLockInstance called with instance-level locks set"
 343     self.op.instance_name = _ExpandInstanceName(self.cfg,
 344                                                 self.op.instance_name)
 345     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 346
 347   def _LockInstancesNodes(self, primary_only=False):
 348     """Helper function to declare instances' nodes for locking.
 349
 350     This function should be called after locking one or more instances to lock
 351     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 352     with all primary or secondary nodes for instances already locked and
 353     present in self.needed_locks[locking.LEVEL_INSTANCE].
 354
 355     It should be called from DeclareLocks, and for safety only works if
 356     self.recalculate_locks[locking.LEVEL_NODE] is set.
 357
 358     In the future it may grow parameters to just lock some instance's nodes, or
 359     to just lock primaries or secondary nodes, if needed.
 360
 361     If should be called in DeclareLocks in a way similar to::
 362
 363       if level == locking.LEVEL_NODE:
 364         self._LockInstancesNodes()
 365
 366     @type primary_only: boolean
 367     @param primary_only: only lock primary nodes of locked instances
 368
 369     """
 370     assert locking.LEVEL_NODE in self.recalculate_locks, \
 371       "_LockInstancesNodes helper function called with no nodes to recalculate"
 372
 373     # TODO: check if we're really been called with the instance locks held
 374
 375     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 376     # future we might want to have different behaviors depending on the value
 377     # of self.recalculate_locks[locking.LEVEL_NODE]
 378     wanted_nodes = []
 379     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 380     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 381       wanted_nodes.append(instance.primary_node)
 382       if not primary_only:
 383         wanted_nodes.extend(instance.secondary_nodes)
 384
 385     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 386       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 387     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 388       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 389
 390     del self.recalculate_locks[locking.LEVEL_NODE]
 391
 392
 393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 394   """Simple LU which runs no hooks.
 395
 396   This LU is intended as a parent for other LogicalUnits which will
 397   run no hooks, in order to reduce duplicate code.
 398
 399   """
 400   HPATH = None
 401   HTYPE = None
 402
 403   def BuildHooksEnv(self):
 404     """Empty BuildHooksEnv for NoHooksLu.
 405
 406     This just raises an error.
 407
 408     """
 409     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 410
 411   def BuildHooksNodes(self):
 412     """Empty BuildHooksNodes for NoHooksLU.
 413
 414     """
 415     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 416
 417
 418 class Tasklet:
 419   """Tasklet base class.
 420
 421   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 422   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 423   tasklets know nothing about locks.
 424
 425   Subclasses must follow these rules:
 426     - Implement CheckPrereq
 427     - Implement Exec
 428
 429   """
 430   def __init__(self, lu):
 431     self.lu = lu
 432
 433     # Shortcuts
 434     self.cfg = lu.cfg
 435     self.rpc = lu.rpc
 436
 437   def CheckPrereq(self):
 438     """Check prerequisites for this tasklets.
 439
 440     This method should check whether the prerequisites for the execution of
 441     this tasklet are fulfilled. It can do internode communication, but it
 442     should be idempotent - no cluster or system changes are allowed.
 443
 444     The method should raise errors.OpPrereqError in case something is not
 445     fulfilled. Its return value is ignored.
 446
 447     This method should also update all parameters to their canonical form if it
 448     hasn't been done before.
 449
 450     """
 451     pass
 452
 453   def Exec(self, feedback_fn):
 454     """Execute the tasklet.
 455
 456     This method should implement the actual work. It should raise
 457     errors.OpExecError for failures that are somewhat dealt with in code, or
 458     expected.
 459
 460     """
 461     raise NotImplementedError
 462
 463
 464 class _QueryBase:
 465   """Base for query utility classes.
 466
 467   """
 468   #: Attribute holding field definitions
 469   FIELDS = None
 470
 471   def __init__(self, filter_, fields, use_locking):
 472     """Initializes this class.
 473
 474     """
 475     self.use_locking = use_locking
 476
 477     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 478                              namefield="name")
 479     self.requested_data = self.query.RequestedData()
 480     self.names = self.query.RequestedNames()
 481
 482     # Sort only if no names were requested
 483     self.sort_by_name = not self.names
 484
 485     self.do_locking = None
 486     self.wanted = None
 487
 488   def _GetNames(self, lu, all_names, lock_level):
 489     """Helper function to determine names asked for in the query.
 490
 491     """
 492     if self.do_locking:
 493       names = lu.owned_locks(lock_level)
 494     else:
 495       names = all_names
 496
 497     if self.wanted == locking.ALL_SET:
 498       assert not self.names
 499       # caller didn't specify names, so ordering is not important
 500       return utils.NiceSort(names)
 501
 502     # caller specified names and we must keep the same order
 503     assert self.names
 504     assert not self.do_locking or lu.glm.is_owned(lock_level)
 505
 506     missing = set(self.wanted).difference(names)
 507     if missing:
 508       raise errors.OpExecError("Some items were removed before retrieving"
 509                                " their data: %s" % missing)
 510
 511     # Return expanded names
 512     return self.wanted
 513
 514   def ExpandNames(self, lu):
 515     """Expand names for this query.
 516
 517     See L{LogicalUnit.ExpandNames}.
 518
 519     """
 520     raise NotImplementedError()
 521
 522   def DeclareLocks(self, lu, level):
 523     """Declare locks for this query.
 524
 525     See L{LogicalUnit.DeclareLocks}.
 526
 527     """
 528     raise NotImplementedError()
 529
 530   def _GetQueryData(self, lu):
 531     """Collects all data for this query.
 532
 533     @return: Query data object
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def NewStyleQuery(self, lu):
 539     """Collect data and execute query.
 540
 541     """
 542     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 543                                   sort_by_name=self.sort_by_name)
 544
 545   def OldStyleQuery(self, lu):
 546     """Collect data and execute query.
 547
 548     """
 549     return self.query.OldStyleQuery(self._GetQueryData(lu),
 550                                     sort_by_name=self.sort_by_name)
 551
 552
 553 def _ShareAll():
 554   """Returns a dict declaring all lock levels shared.
 555
 556   """
 557   return dict.fromkeys(locking.LEVELS, 1)
 558
 559
 560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 561   """Checks if the owned node groups are still correct for an instance.
 562
 563   @type cfg: L{config.ConfigWriter}
 564   @param cfg: The cluster configuration
 565   @type instance_name: string
 566   @param instance_name: Instance name
 567   @type owned_groups: set or frozenset
 568   @param owned_groups: List of currently owned node groups
 569
 570   """
 571   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 572
 573   if not owned_groups.issuperset(inst_groups):
 574     raise errors.OpPrereqError("Instance %s's node groups changed since"
 575                                " locks were acquired, current groups are"
 576                                " are '%s', owning groups '%s'; retry the"
 577                                " operation" %
 578                                (instance_name,
 579                                 utils.CommaJoin(inst_groups),
 580                                 utils.CommaJoin(owned_groups)),
 581                                errors.ECODE_STATE)
 582
 583   return inst_groups
 584
 585
 586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 587   """Checks if the instances in a node group are still correct.
 588
 589   @type cfg: L{config.ConfigWriter}
 590   @param cfg: The cluster configuration
 591   @type group_uuid: string
 592   @param group_uuid: Node group UUID
 593   @type owned_instances: set or frozenset
 594   @param owned_instances: List of currently owned instances
 595
 596   """
 597   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 598   if owned_instances != wanted_instances:
 599     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 600                                " locks were acquired, wanted '%s', have '%s';"
 601                                " retry the operation" %
 602                                (group_uuid,
 603                                 utils.CommaJoin(wanted_instances),
 604                                 utils.CommaJoin(owned_instances)),
 605                                errors.ECODE_STATE)
 606
 607   return wanted_instances
 608
 609
 610 def _SupportsOob(cfg, node):
 611   """Tells if node supports OOB.
 612
 613   @type cfg: L{config.ConfigWriter}
 614   @param cfg: The cluster configuration
 615   @type node: L{objects.Node}
 616   @param node: The node
 617   @return: The OOB script if supported or an empty string otherwise
 618
 619   """
 620   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 621
 622
 623 def _GetWantedNodes(lu, nodes):
 624   """Returns list of checked and expanded node names.
 625
 626   @type lu: L{LogicalUnit}
 627   @param lu: the logical unit on whose behalf we execute
 628   @type nodes: list
 629   @param nodes: list of node names or None for all nodes
 630   @rtype: list
 631   @return: the list of nodes, sorted
 632   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 633
 634   """
 635   if nodes:
 636     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 637
 638   return utils.NiceSort(lu.cfg.GetNodeList())
 639
 640
 641 def _GetWantedInstances(lu, instances):
 642   """Returns list of checked and expanded instance names.
 643
 644   @type lu: L{LogicalUnit}
 645   @param lu: the logical unit on whose behalf we execute
 646   @type instances: list
 647   @param instances: list of instance names or None for all instances
 648   @rtype: list
 649   @return: the list of instances, sorted
 650   @raise errors.OpPrereqError: if the instances parameter is wrong type
 651   @raise errors.OpPrereqError: if any of the passed instances is not found
 652
 653   """
 654   if instances:
 655     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 656   else:
 657     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 658   return wanted
 659
 660
 661 def _GetUpdatedParams(old_params, update_dict,
 662                       use_default=True, use_none=False):
 663   """Return the new version of a parameter dictionary.
 664
 665   @type old_params: dict
 666   @param old_params: old parameters
 667   @type update_dict: dict
 668   @param update_dict: dict containing new parameter values, or
 669       constants.VALUE_DEFAULT to reset the parameter to its default
 670       value
 671   @param use_default: boolean
 672   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 673       values as 'to be deleted' values
 674   @param use_none: boolean
 675   @type use_none: whether to recognise C{None} values as 'to be
 676       deleted' values
 677   @rtype: dict
 678   @return: the new parameter dictionary
 679
 680   """
 681   params_copy = copy.deepcopy(old_params)
 682   for key, val in update_dict.iteritems():
 683     if ((use_default and val == constants.VALUE_DEFAULT) or
 684         (use_none and val is None)):
 685       try:
 686         del params_copy[key]
 687       except KeyError:
 688         pass
 689     else:
 690       params_copy[key] = val
 691   return params_copy
 692
 693
 694 def _ReleaseLocks(lu, level, names=None, keep=None):
 695   """Releases locks owned by an LU.
 696
 697   @type lu: L{LogicalUnit}
 698   @param level: Lock level
 699   @type names: list or None
 700   @param names: Names of locks to release
 701   @type keep: list or None
 702   @param keep: Names of locks to retain
 703
 704   """
 705   assert not (keep is not None and names is not None), \
 706          "Only one of the 'names' and the 'keep' parameters can be given"
 707
 708   if names is not None:
 709     should_release = names.__contains__
 710   elif keep:
 711     should_release = lambda name: name not in keep
 712   else:
 713     should_release = None
 714
 715   if should_release:
 716     retain = []
 717     release = []
 718
 719     # Determine which locks to release
 720     for name in lu.owned_locks(level):
 721       if should_release(name):
 722         release.append(name)
 723       else:
 724         retain.append(name)
 725
 726     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 727
 728     # Release just some locks
 729     lu.glm.release(level, names=release)
 730
 731     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 732   else:
 733     # Release everything
 734     lu.glm.release(level)
 735
 736     assert not lu.glm.is_owned(level), "No locks should be owned"
 737
 738
 739 def _MapInstanceDisksToNodes(instances):
 740   """Creates a map from (node, volume) to instance name.
 741
 742   @type instances: list of L{objects.Instance}
 743   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 744
 745   """
 746   return dict(((node, vol), inst.name)
 747               for inst in instances
 748               for (node, vols) in inst.MapLVsByNode().items()
 749               for vol in vols)
 750
 751
 752 def _RunPostHook(lu, node_name):
 753   """Runs the post-hook for an opcode on a single node.
 754
 755   """
 756   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 757   try:
 758     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 759   except:
 760     # pylint: disable=W0702
 761     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 762
 763
 764 def _CheckOutputFields(static, dynamic, selected):
 765   """Checks whether all selected fields are valid.
 766
 767   @type static: L{utils.FieldSet}
 768   @param static: static fields set
 769   @type dynamic: L{utils.FieldSet}
 770   @param dynamic: dynamic fields set
 771
 772   """
 773   f = utils.FieldSet()
 774   f.Extend(static)
 775   f.Extend(dynamic)
 776
 777   delta = f.NonMatching(selected)
 778   if delta:
 779     raise errors.OpPrereqError("Unknown output fields selected: %s"
 780                                % ",".join(delta), errors.ECODE_INVAL)
 781
 782
 783 def _CheckGlobalHvParams(params):
 784   """Validates that given hypervisor params are not global ones.
 785
 786   This will ensure that instances don't get customised versions of
 787   global params.
 788
 789   """
 790   used_globals = constants.HVC_GLOBALS.intersection(params)
 791   if used_globals:
 792     msg = ("The following hypervisor parameters are global and cannot"
 793            " be customized at instance level, please modify them at"
 794            " cluster level: %s" % utils.CommaJoin(used_globals))
 795     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 796
 797
 798 def _CheckNodeOnline(lu, node, msg=None):
 799   """Ensure that a given node is online.
 800
 801   @param lu: the LU on behalf of which we make the check
 802   @param node: the node to check
 803   @param msg: if passed, should be a message to replace the default one
 804   @raise errors.OpPrereqError: if the node is offline
 805
 806   """
 807   if msg is None:
 808     msg = "Can't use offline node"
 809   if lu.cfg.GetNodeInfo(node).offline:
 810     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 811
 812
 813 def _CheckNodeNotDrained(lu, node):
 814   """Ensure that a given node is not drained.
 815
 816   @param lu: the LU on behalf of which we make the check
 817   @param node: the node to check
 818   @raise errors.OpPrereqError: if the node is drained
 819
 820   """
 821   if lu.cfg.GetNodeInfo(node).drained:
 822     raise errors.OpPrereqError("Can't use drained node %s" % node,
 823                                errors.ECODE_STATE)
 824
 825
 826 def _CheckNodeVmCapable(lu, node):
 827   """Ensure that a given node is vm capable.
 828
 829   @param lu: the LU on behalf of which we make the check
 830   @param node: the node to check
 831   @raise errors.OpPrereqError: if the node is not vm capable
 832
 833   """
 834   if not lu.cfg.GetNodeInfo(node).vm_capable:
 835     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 836                                errors.ECODE_STATE)
 837
 838
 839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 840   """Ensure that a node supports a given OS.
 841
 842   @param lu: the LU on behalf of which we make the check
 843   @param node: the node to check
 844   @param os_name: the OS to query about
 845   @param force_variant: whether to ignore variant errors
 846   @raise errors.OpPrereqError: if the node is not supporting the OS
 847
 848   """
 849   result = lu.rpc.call_os_get(node, os_name)
 850   result.Raise("OS '%s' not in supported OS list for node %s" %
 851                (os_name, node),
 852                prereq=True, ecode=errors.ECODE_INVAL)
 853   if not force_variant:
 854     _CheckOSVariant(result.payload, os_name)
 855
 856
 857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 858   """Ensure that a node has the given secondary ip.
 859
 860   @type lu: L{LogicalUnit}
 861   @param lu: the LU on behalf of which we make the check
 862   @type node: string
 863   @param node: the node to check
 864   @type secondary_ip: string
 865   @param secondary_ip: the ip to check
 866   @type prereq: boolean
 867   @param prereq: whether to throw a prerequisite or an execute error
 868   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 869   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 870
 871   """
 872   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 873   result.Raise("Failure checking secondary ip on node %s" % node,
 874                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 875   if not result.payload:
 876     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 877            " please fix and re-run this command" % secondary_ip)
 878     if prereq:
 879       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 880     else:
 881       raise errors.OpExecError(msg)
 882
 883
 884 def _GetClusterDomainSecret():
 885   """Reads the cluster domain secret.
 886
 887   """
 888   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 889                                strict=True)
 890
 891
 892 def _CheckInstanceDown(lu, instance, reason):
 893   """Ensure that an instance is not running."""
 894   if instance.admin_up:
 895     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 896                                (instance.name, reason), errors.ECODE_STATE)
 897
 898   pnode = instance.primary_node
 899   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 900   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 901               prereq=True, ecode=errors.ECODE_ENVIRON)
 902
 903   if instance.name in ins_l.payload:
 904     raise errors.OpPrereqError("Instance %s is running, %s" %
 905                                (instance.name, reason), errors.ECODE_STATE)
 906
 907
 908 def _ExpandItemName(fn, name, kind):
 909   """Expand an item name.
 910
 911   @param fn: the function to use for expansion
 912   @param name: requested item name
 913   @param kind: text description ('Node' or 'Instance')
 914   @return: the resolved (full) name
 915   @raise errors.OpPrereqError: if the item is not found
 916
 917   """
 918   full_name = fn(name)
 919   if full_name is None:
 920     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 921                                errors.ECODE_NOENT)
 922   return full_name
 923
 924
 925 def _ExpandNodeName(cfg, name):
 926   """Wrapper over L{_ExpandItemName} for nodes."""
 927   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 928
 929
 930 def _ExpandInstanceName(cfg, name):
 931   """Wrapper over L{_ExpandItemName} for instance."""
 932   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 933
 934
 935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 936                           memory, vcpus, nics, disk_template, disks,
 937                           bep, hvp, hypervisor_name, tags):
 938   """Builds instance related env variables for hooks
 939
 940   This builds the hook environment from individual variables.
 941
 942   @type name: string
 943   @param name: the name of the instance
 944   @type primary_node: string
 945   @param primary_node: the name of the instance's primary node
 946   @type secondary_nodes: list
 947   @param secondary_nodes: list of secondary nodes as strings
 948   @type os_type: string
 949   @param os_type: the name of the instance's OS
 950   @type status: boolean
 951   @param status: the should_run status of the instance
 952   @type memory: string
 953   @param memory: the memory size of the instance
 954   @type vcpus: string
 955   @param vcpus: the count of VCPUs the instance has
 956   @type nics: list
 957   @param nics: list of tuples (ip, mac, mode, link) representing
 958       the NICs the instance has
 959   @type disk_template: string
 960   @param disk_template: the disk template of the instance
 961   @type disks: list
 962   @param disks: the list of (size, mode) pairs
 963   @type bep: dict
 964   @param bep: the backend parameters for the instance
 965   @type hvp: dict
 966   @param hvp: the hypervisor parameters for the instance
 967   @type hypervisor_name: string
 968   @param hypervisor_name: the hypervisor for the instance
 969   @type tags: list
 970   @param tags: list of instance tags as strings
 971   @rtype: dict
 972   @return: the hook environment for this instance
 973
 974   """
 975   if status:
 976     str_status = "up"
 977   else:
 978     str_status = "down"
 979   env = {
 980     "OP_TARGET": name,
 981     "INSTANCE_NAME": name,
 982     "INSTANCE_PRIMARY": primary_node,
 983     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 984     "INSTANCE_OS_TYPE": os_type,
 985     "INSTANCE_STATUS": str_status,
 986     "INSTANCE_MEMORY": memory,
 987     "INSTANCE_VCPUS": vcpus,
 988     "INSTANCE_DISK_TEMPLATE": disk_template,
 989     "INSTANCE_HYPERVISOR": hypervisor_name,
 990   }
 991
 992   if nics:
 993     nic_count = len(nics)
 994     for idx, (ip, mac, mode, link) in enumerate(nics):
 995       if ip is None:
 996         ip = ""
 997       env["INSTANCE_NIC%d_IP" % idx] = ip
 998       env["INSTANCE_NIC%d_MAC" % idx] = mac
 999       env["INSTANCE_NIC%d_MODE" % idx] = mode
1000       env["INSTANCE_NIC%d_LINK" % idx] = link
1001       if mode == constants.NIC_MODE_BRIDGED:
1002         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003   else:
1004     nic_count = 0
1005
1006   env["INSTANCE_NIC_COUNT"] = nic_count
1007
1008   if disks:
1009     disk_count = len(disks)
1010     for idx, (size, mode) in enumerate(disks):
1011       env["INSTANCE_DISK%d_SIZE" % idx] = size
1012       env["INSTANCE_DISK%d_MODE" % idx] = mode
1013   else:
1014     disk_count = 0
1015
1016   env["INSTANCE_DISK_COUNT"] = disk_count
1017
1018   if not tags:
1019     tags = []
1020
1021   env["INSTANCE_TAGS"] = " ".join(tags)
1022
1023   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024     for key, value in source.items():
1025       env["INSTANCE_%s_%s" % (kind, key)] = value
1026
1027   return env
1028
1029
1030 def _NICListToTuple(lu, nics):
1031   """Build a list of nic information tuples.
1032
1033   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034   value in LUInstanceQueryData.
1035
1036   @type lu:  L{LogicalUnit}
1037   @param lu: the logical unit on whose behalf we execute
1038   @type nics: list of L{objects.NIC}
1039   @param nics: list of nics to convert to hooks tuples
1040
1041   """
1042   hooks_nics = []
1043   cluster = lu.cfg.GetClusterInfo()
1044   for nic in nics:
1045     ip = nic.ip
1046     mac = nic.mac
1047     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048     mode = filled_params[constants.NIC_MODE]
1049     link = filled_params[constants.NIC_LINK]
1050     hooks_nics.append((ip, mac, mode, link))
1051   return hooks_nics
1052
1053
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055   """Builds instance related env variables for hooks from an object.
1056
1057   @type lu: L{LogicalUnit}
1058   @param lu: the logical unit on whose behalf we execute
1059   @type instance: L{objects.Instance}
1060   @param instance: the instance for which we should build the
1061       environment
1062   @type override: dict
1063   @param override: dictionary with key/values that will override
1064       our values
1065   @rtype: dict
1066   @return: the hook environment dictionary
1067
1068   """
1069   cluster = lu.cfg.GetClusterInfo()
1070   bep = cluster.FillBE(instance)
1071   hvp = cluster.FillHV(instance)
1072   args = {
1073     "name": instance.name,
1074     "primary_node": instance.primary_node,
1075     "secondary_nodes": instance.secondary_nodes,
1076     "os_type": instance.os,
1077     "status": instance.admin_up,
1078     "memory": bep[constants.BE_MEMORY],
1079     "vcpus": bep[constants.BE_VCPUS],
1080     "nics": _NICListToTuple(lu, instance.nics),
1081     "disk_template": instance.disk_template,
1082     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083     "bep": bep,
1084     "hvp": hvp,
1085     "hypervisor_name": instance.hypervisor,
1086     "tags": instance.tags,
1087   }
1088   if override:
1089     args.update(override)
1090   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091
1092
1093 def _AdjustCandidatePool(lu, exceptions):
1094   """Adjust the candidate pool after node operations.
1095
1096   """
1097   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098   if mod_list:
1099     lu.LogInfo("Promoted nodes to master candidate role: %s",
1100                utils.CommaJoin(node.name for node in mod_list))
1101     for name in mod_list:
1102       lu.context.ReaddNode(name)
1103   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104   if mc_now > mc_max:
1105     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106                (mc_now, mc_max))
1107
1108
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110   """Decide whether I should promote myself as a master candidate.
1111
1112   """
1113   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115   # the new node will increase mc_max with one, so:
1116   mc_should = min(mc_should + 1, cp_size)
1117   return mc_now < mc_should
1118
1119
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121   """Check that the brigdes needed by a list of nics exist.
1122
1123   """
1124   cluster = lu.cfg.GetClusterInfo()
1125   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126   brlist = [params[constants.NIC_LINK] for params in paramslist
1127             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128   if brlist:
1129     result = lu.rpc.call_bridges_exist(target_node, brlist)
1130     result.Raise("Error checking bridges on destination node '%s'" %
1131                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132
1133
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135   """Check that the brigdes needed by an instance exist.
1136
1137   """
1138   if node is None:
1139     node = instance.primary_node
1140   _CheckNicsBridgesExist(lu, instance.nics, node)
1141
1142
1143 def _CheckOSVariant(os_obj, name):
1144   """Check whether an OS name conforms to the os variants specification.
1145
1146   @type os_obj: L{objects.OS}
1147   @param os_obj: OS object to check
1148   @type name: string
1149   @param name: OS name passed by the user, to check for validity
1150
1151   """
1152   variant = objects.OS.GetVariant(name)
1153   if not os_obj.supported_variants:
1154     if variant:
1155       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156                                  " passed)" % (os_obj.name, variant),
1157                                  errors.ECODE_INVAL)
1158     return
1159   if not variant:
1160     raise errors.OpPrereqError("OS name must include a variant",
1161                                errors.ECODE_INVAL)
1162
1163   if variant not in os_obj.supported_variants:
1164     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165
1166
1167 def _GetNodeInstancesInner(cfg, fn):
1168   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169
1170
1171 def _GetNodeInstances(cfg, node_name):
1172   """Returns a list of all primary and secondary instances on a node.
1173
1174   """
1175
1176   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177
1178
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180   """Returns primary instances on a node.
1181
1182   """
1183   return _GetNodeInstancesInner(cfg,
1184                                 lambda inst: node_name == inst.primary_node)
1185
1186
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188   """Returns secondary instances on a node.
1189
1190   """
1191   return _GetNodeInstancesInner(cfg,
1192                                 lambda inst: node_name in inst.secondary_nodes)
1193
1194
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196   """Returns the arguments for a storage type.
1197
1198   """
1199   # Special case for file storage
1200   if storage_type == constants.ST_FILE:
1201     # storage.FileStorage wants a list of storage directories
1202     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203
1204   return []
1205
1206
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208   faulty = []
1209
1210   for dev in instance.disks:
1211     cfg.SetDiskID(dev, node_name)
1212
1213   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214   result.Raise("Failed to get disk status from node %s" % node_name,
1215                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216
1217   for idx, bdev_status in enumerate(result.payload):
1218     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219       faulty.append(idx)
1220
1221   return faulty
1222
1223
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225   """Check the sanity of iallocator and node arguments and use the
1226   cluster-wide iallocator if appropriate.
1227
1228   Check that at most one of (iallocator, node) is specified. If none is
1229   specified, then the LU's opcode's iallocator slot is filled with the
1230   cluster-wide default iallocator.
1231
1232   @type iallocator_slot: string
1233   @param iallocator_slot: the name of the opcode iallocator slot
1234   @type node_slot: string
1235   @param node_slot: the name of the opcode target node slot
1236
1237   """
1238   node = getattr(lu.op, node_slot, None)
1239   iallocator = getattr(lu.op, iallocator_slot, None)
1240
1241   if node is not None and iallocator is not None:
1242     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243                                errors.ECODE_INVAL)
1244   elif node is None and iallocator is None:
1245     default_iallocator = lu.cfg.GetDefaultIAllocator()
1246     if default_iallocator:
1247       setattr(lu.op, iallocator_slot, default_iallocator)
1248     else:
1249       raise errors.OpPrereqError("No iallocator or node given and no"
1250                                  " cluster-wide default iallocator found;"
1251                                  " please specify either an iallocator or a"
1252                                  " node, or set a cluster-wide default"
1253                                  " iallocator")
1254
1255
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257   """Decides on which iallocator to use.
1258
1259   @type cfg: L{config.ConfigWriter}
1260   @param cfg: Cluster configuration object
1261   @type iallocator: string or None
1262   @param iallocator: Iallocator specified in opcode
1263   @rtype: string
1264   @return: Iallocator name
1265
1266   """
1267   if not iallocator:
1268     # Use default iallocator
1269     iallocator = cfg.GetDefaultIAllocator()
1270
1271   if not iallocator:
1272     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273                                " opcode nor as a cluster-wide default",
1274                                errors.ECODE_INVAL)
1275
1276   return iallocator
1277
1278
1279 class LUClusterPostInit(LogicalUnit):
1280   """Logical unit for running hooks after cluster initialization.
1281
1282   """
1283   HPATH = "cluster-init"
1284   HTYPE = constants.HTYPE_CLUSTER
1285
1286   def BuildHooksEnv(self):
1287     """Build hooks env.
1288
1289     """
1290     return {
1291       "OP_TARGET": self.cfg.GetClusterName(),
1292       }
1293
1294   def BuildHooksNodes(self):
1295     """Build hooks nodes.
1296
1297     """
1298     return ([], [self.cfg.GetMasterNode()])
1299
1300   def Exec(self, feedback_fn):
1301     """Nothing to do.
1302
1303     """
1304     return True
1305
1306
1307 class LUClusterDestroy(LogicalUnit):
1308   """Logical unit for destroying the cluster.
1309
1310   """
1311   HPATH = "cluster-destroy"
1312   HTYPE = constants.HTYPE_CLUSTER
1313
1314   def BuildHooksEnv(self):
1315     """Build hooks env.
1316
1317     """
1318     return {
1319       "OP_TARGET": self.cfg.GetClusterName(),
1320       }
1321
1322   def BuildHooksNodes(self):
1323     """Build hooks nodes.
1324
1325     """
1326     return ([], [])
1327
1328   def CheckPrereq(self):
1329     """Check prerequisites.
1330
1331     This checks whether the cluster is empty.
1332
1333     Any errors are signaled by raising errors.OpPrereqError.
1334
1335     """
1336     master = self.cfg.GetMasterNode()
1337
1338     nodelist = self.cfg.GetNodeList()
1339     if len(nodelist) != 1 or nodelist[0] != master:
1340       raise errors.OpPrereqError("There are still %d node(s) in"
1341                                  " this cluster." % (len(nodelist) - 1),
1342                                  errors.ECODE_INVAL)
1343     instancelist = self.cfg.GetInstanceList()
1344     if instancelist:
1345       raise errors.OpPrereqError("There are still %d instance(s) in"
1346                                  " this cluster." % len(instancelist),
1347                                  errors.ECODE_INVAL)
1348
1349   def Exec(self, feedback_fn):
1350     """Destroys the cluster.
1351
1352     """
1353     master = self.cfg.GetMasterNode()
1354
1355     # Run post hooks on master node before it's removed
1356     _RunPostHook(self, master)
1357
1358     result = self.rpc.call_node_stop_master(master, False)
1359     result.Raise("Could not disable the master role")
1360
1361     return master
1362
1363
1364 def _VerifyCertificate(filename):
1365   """Verifies a certificate for L{LUClusterVerifyConfig}.
1366
1367   @type filename: string
1368   @param filename: Path to PEM file
1369
1370   """
1371   try:
1372     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373                                            utils.ReadFile(filename))
1374   except Exception, err: # pylint: disable=W0703
1375     return (LUClusterVerifyConfig.ETYPE_ERROR,
1376             "Failed to load X509 certificate %s: %s" % (filename, err))
1377
1378   (errcode, msg) = \
1379     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380                                 constants.SSL_CERT_EXPIRATION_ERROR)
1381
1382   if msg:
1383     fnamemsg = "While verifying %s: %s" % (filename, msg)
1384   else:
1385     fnamemsg = None
1386
1387   if errcode is None:
1388     return (None, fnamemsg)
1389   elif errcode == utils.CERT_WARNING:
1390     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391   elif errcode == utils.CERT_ERROR:
1392     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393
1394   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395
1396
1397 def _GetAllHypervisorParameters(cluster, instances):
1398   """Compute the set of all hypervisor parameters.
1399
1400   @type cluster: L{objects.Cluster}
1401   @param cluster: the cluster object
1402   @param instances: list of L{objects.Instance}
1403   @param instances: additional instances from which to obtain parameters
1404   @rtype: list of (origin, hypervisor, parameters)
1405   @return: a list with all parameters found, indicating the hypervisor they
1406        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407
1408   """
1409   hvp_data = []
1410
1411   for hv_name in cluster.enabled_hypervisors:
1412     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413
1414   for os_name, os_hvp in cluster.os_hvp.items():
1415     for hv_name, hv_params in os_hvp.items():
1416       if hv_params:
1417         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419
1420   # TODO: collapse identical parameter values in a single one
1421   for instance in instances:
1422     if instance.hvparams:
1423       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424                        cluster.FillHV(instance)))
1425
1426   return hvp_data
1427
1428
1429 class _VerifyErrors(object):
1430   """Mix-in for cluster/group verify LUs.
1431
1432   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433   self.op and self._feedback_fn to be available.)
1434
1435   """
1436   TCLUSTER = "cluster"
1437   TNODE = "node"
1438   TINSTANCE = "instance"
1439
1440   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443   ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444   ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452   ENODEDRBD = (TNODE, "ENODEDRBD")
1453   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456   ENODEHV = (TNODE, "ENODEHV")
1457   ENODELVM = (TNODE, "ENODELVM")
1458   ENODEN1 = (TNODE, "ENODEN1")
1459   ENODENET = (TNODE, "ENODENET")
1460   ENODEOS = (TNODE, "ENODEOS")
1461   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463   ENODERPC = (TNODE, "ENODERPC")
1464   ENODESSH = (TNODE, "ENODESSH")
1465   ENODEVERSION = (TNODE, "ENODEVERSION")
1466   ENODESETUP = (TNODE, "ENODESETUP")
1467   ENODETIME = (TNODE, "ENODETIME")
1468   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469
1470   ETYPE_FIELD = "code"
1471   ETYPE_ERROR = "ERROR"
1472   ETYPE_WARNING = "WARNING"
1473
1474   def _Error(self, ecode, item, msg, *args, **kwargs):
1475     """Format an error message.
1476
1477     Based on the opcode's error_codes parameter, either format a
1478     parseable error code, or a simpler error string.
1479
1480     This must be called only from Exec and functions called from Exec.
1481
1482     """
1483     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484     itype, etxt = ecode
1485     # first complete the msg
1486     if args:
1487       msg = msg % args
1488     # then format the whole message
1489     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491     else:
1492       if item:
1493         item = " " + item
1494       else:
1495         item = ""
1496       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497     # and finally report it via the feedback_fn
1498     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499
1500   def _ErrorIf(self, cond, *args, **kwargs):
1501     """Log an error message if the passed condition is True.
1502
1503     """
1504     cond = (bool(cond)
1505             or self.op.debug_simulate_errors) # pylint: disable=E1101
1506     if cond:
1507       self._Error(*args, **kwargs)
1508     # do not mark the operation as failed for WARN cases only
1509     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510       self.bad = self.bad or cond
1511
1512
1513 class LUClusterVerify(NoHooksLU):
1514   """Submits all jobs necessary to verify the cluster.
1515
1516   """
1517   REQ_BGL = False
1518
1519   def ExpandNames(self):
1520     self.needed_locks = {}
1521
1522   def Exec(self, feedback_fn):
1523     jobs = []
1524
1525     if self.op.group_name:
1526       groups = [self.op.group_name]
1527       depends_fn = lambda: None
1528     else:
1529       groups = self.cfg.GetNodeGroupList()
1530
1531       # Verify global configuration
1532       jobs.append([opcodes.OpClusterVerifyConfig()])
1533
1534       # Always depend on global verification
1535       depends_fn = lambda: [(-len(jobs), [])]
1536
1537     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538                                               depends=depends_fn())]
1539                 for group in groups)
1540
1541     # Fix up all parameters
1542     for op in itertools.chain(*jobs): # pylint: disable=W0142
1543       op.debug_simulate_errors = self.op.debug_simulate_errors
1544       op.verbose = self.op.verbose
1545       op.error_codes = self.op.error_codes
1546       try:
1547         op.skip_checks = self.op.skip_checks
1548       except AttributeError:
1549         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550
1551     return ResultWithJobs(jobs)
1552
1553
1554 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555   """Verifies the cluster config.
1556
1557   """
1558   REQ_BGL = False
1559
1560   def _VerifyHVP(self, hvp_data):
1561     """Verifies locally the syntax of the hypervisor parameters.
1562
1563     """
1564     for item, hv_name, hv_params in hvp_data:
1565       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566              (item, hv_name))
1567       try:
1568         hv_class = hypervisor.GetHypervisor(hv_name)
1569         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570         hv_class.CheckParameterSyntax(hv_params)
1571       except errors.GenericError, err:
1572         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573
1574   def ExpandNames(self):
1575     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1576     self.share_locks = _ShareAll()
1577
1578   def CheckPrereq(self):
1579     """Check prerequisites.
1580
1581     """
1582     # Retrieve all information
1583     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1584     self.all_node_info = self.cfg.GetAllNodesInfo()
1585     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1586
1587   def Exec(self, feedback_fn):
1588     """Verify integrity of cluster, performing various test on nodes.
1589
1590     """
1591     self.bad = False
1592     self._feedback_fn = feedback_fn
1593
1594     feedback_fn("* Verifying cluster config")
1595
1596     for msg in self.cfg.VerifyConfig():
1597       self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1598
1599     feedback_fn("* Verifying cluster certificate files")
1600
1601     for cert_filename in constants.ALL_CERT_FILES:
1602       (errcode, msg) = _VerifyCertificate(cert_filename)
1603       self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1604
1605     feedback_fn("* Verifying hypervisor parameters")
1606
1607     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1608                                                 self.all_inst_info.values()))
1609
1610     feedback_fn("* Verifying all nodes belong to an existing group")
1611
1612     # We do this verification here because, should this bogus circumstance
1613     # occur, it would never be caught by VerifyGroup, which only acts on
1614     # nodes/instances reachable from existing node groups.
1615
1616     dangling_nodes = set(node.name for node in self.all_node_info.values()
1617                          if node.group not in self.all_group_info)
1618
1619     dangling_instances = {}
1620     no_node_instances = []
1621
1622     for inst in self.all_inst_info.values():
1623       if inst.primary_node in dangling_nodes:
1624         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1625       elif inst.primary_node not in self.all_node_info:
1626         no_node_instances.append(inst.name)
1627
1628     pretty_dangling = [
1629         "%s (%s)" %
1630         (node.name,
1631          utils.CommaJoin(dangling_instances.get(node.name,
1632                                                 ["no instances"])))
1633         for node in dangling_nodes]
1634
1635     self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1636                   "the following nodes (and their instances) belong to a non"
1637                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1638
1639     self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1640                   "the following instances have a non-existing primary-node:"
1641                   " %s", utils.CommaJoin(no_node_instances))
1642
1643     return not self.bad
1644
1645
1646 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1647   """Verifies the status of a node group.
1648
1649   """
1650   HPATH = "cluster-verify"
1651   HTYPE = constants.HTYPE_CLUSTER
1652   REQ_BGL = False
1653
1654   _HOOKS_INDENT_RE = re.compile("^", re.M)
1655
1656   class NodeImage(object):
1657     """A class representing the logical and physical status of a node.
1658
1659     @type name: string
1660     @ivar name: the node name to which this object refers
1661     @ivar volumes: a structure as returned from
1662         L{ganeti.backend.GetVolumeList} (runtime)
1663     @ivar instances: a list of running instances (runtime)
1664     @ivar pinst: list of configured primary instances (config)
1665     @ivar sinst: list of configured secondary instances (config)
1666     @ivar sbp: dictionary of {primary-node: list of instances} for all
1667         instances for which this node is secondary (config)
1668     @ivar mfree: free memory, as reported by hypervisor (runtime)
1669     @ivar dfree: free disk, as reported by the node (runtime)
1670     @ivar offline: the offline status (config)
1671     @type rpc_fail: boolean
1672     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1673         not whether the individual keys were correct) (runtime)
1674     @type lvm_fail: boolean
1675     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1676     @type hyp_fail: boolean
1677     @ivar hyp_fail: whether the RPC call didn't return the instance list
1678     @type ghost: boolean
1679     @ivar ghost: whether this is a known node or not (config)
1680     @type os_fail: boolean
1681     @ivar os_fail: whether the RPC call didn't return valid OS data
1682     @type oslist: list
1683     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1684     @type vm_capable: boolean
1685     @ivar vm_capable: whether the node can host instances
1686
1687     """
1688     def __init__(self, offline=False, name=None, vm_capable=True):
1689       self.name = name
1690       self.volumes = {}
1691       self.instances = []
1692       self.pinst = []
1693       self.sinst = []
1694       self.sbp = {}
1695       self.mfree = 0
1696       self.dfree = 0
1697       self.offline = offline
1698       self.vm_capable = vm_capable
1699       self.rpc_fail = False
1700       self.lvm_fail = False
1701       self.hyp_fail = False
1702       self.ghost = False
1703       self.os_fail = False
1704       self.oslist = {}
1705
1706   def ExpandNames(self):
1707     # This raises errors.OpPrereqError on its own:
1708     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1709
1710     # Get instances in node group; this is unsafe and needs verification later
1711     inst_names = \
1712       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1713
1714     self.needed_locks = {
1715       locking.LEVEL_INSTANCE: inst_names,
1716       locking.LEVEL_NODEGROUP: [self.group_uuid],
1717       locking.LEVEL_NODE: [],
1718       }
1719
1720     self.share_locks = _ShareAll()
1721
1722   def DeclareLocks(self, level):
1723     if level == locking.LEVEL_NODE:
1724       # Get members of node group; this is unsafe and needs verification later
1725       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1726
1727       all_inst_info = self.cfg.GetAllInstancesInfo()
1728
1729       # In Exec(), we warn about mirrored instances that have primary and
1730       # secondary living in separate node groups. To fully verify that
1731       # volumes for these instances are healthy, we will need to do an
1732       # extra call to their secondaries. We ensure here those nodes will
1733       # be locked.
1734       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1735         # Important: access only the instances whose lock is owned
1736         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1737           nodes.update(all_inst_info[inst].secondary_nodes)
1738
1739       self.needed_locks[locking.LEVEL_NODE] = nodes
1740
1741   def CheckPrereq(self):
1742     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1743     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1744
1745     group_nodes = set(self.group_info.members)
1746     group_instances = \
1747       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1748
1749     unlocked_nodes = \
1750         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1751
1752     unlocked_instances = \
1753         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1754
1755     if unlocked_nodes:
1756       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1757                                  utils.CommaJoin(unlocked_nodes),
1758                                  errors.ECODE_STATE)
1759
1760     if unlocked_instances:
1761       raise errors.OpPrereqError("Missing lock for instances: %s" %
1762                                  utils.CommaJoin(unlocked_instances),
1763                                  errors.ECODE_STATE)
1764
1765     self.all_node_info = self.cfg.GetAllNodesInfo()
1766     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1767
1768     self.my_node_names = utils.NiceSort(group_nodes)
1769     self.my_inst_names = utils.NiceSort(group_instances)
1770
1771     self.my_node_info = dict((name, self.all_node_info[name])
1772                              for name in self.my_node_names)
1773
1774     self.my_inst_info = dict((name, self.all_inst_info[name])
1775                              for name in self.my_inst_names)
1776
1777     # We detect here the nodes that will need the extra RPC calls for verifying
1778     # split LV volumes; they should be locked.
1779     extra_lv_nodes = set()
1780
1781     for inst in self.my_inst_info.values():
1782       if inst.disk_template in constants.DTS_INT_MIRROR:
1783         for nname in inst.all_nodes:
1784           if self.all_node_info[nname].group != self.group_uuid:
1785             extra_lv_nodes.add(nname)
1786
1787     unlocked_lv_nodes = \
1788         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1789
1790     if unlocked_lv_nodes:
1791       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1792                                  utils.CommaJoin(unlocked_lv_nodes),
1793                                  errors.ECODE_STATE)
1794     self.extra_lv_nodes = list(extra_lv_nodes)
1795
1796   def _VerifyNode(self, ninfo, nresult):
1797     """Perform some basic validation on data returned from a node.
1798
1799       - check the result data structure is well formed and has all the
1800         mandatory fields
1801       - check ganeti version
1802
1803     @type ninfo: L{objects.Node}
1804     @param ninfo: the node to check
1805     @param nresult: the results from the node
1806     @rtype: boolean
1807     @return: whether overall this call was successful (and we can expect
1808          reasonable values in the respose)
1809
1810     """
1811     node = ninfo.name
1812     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1813
1814     # main result, nresult should be a non-empty dict
1815     test = not nresult or not isinstance(nresult, dict)
1816     _ErrorIf(test, self.ENODERPC, node,
1817                   "unable to verify node: no data returned")
1818     if test:
1819       return False
1820
1821     # compares ganeti version
1822     local_version = constants.PROTOCOL_VERSION
1823     remote_version = nresult.get("version", None)
1824     test = not (remote_version and
1825                 isinstance(remote_version, (list, tuple)) and
1826                 len(remote_version) == 2)
1827     _ErrorIf(test, self.ENODERPC, node,
1828              "connection to node returned invalid data")
1829     if test:
1830       return False
1831
1832     test = local_version != remote_version[0]
1833     _ErrorIf(test, self.ENODEVERSION, node,
1834              "incompatible protocol versions: master %s,"
1835              " node %s", local_version, remote_version[0])
1836     if test:
1837       return False
1838
1839     # node seems compatible, we can actually try to look into its results
1840
1841     # full package version
1842     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1843                   self.ENODEVERSION, node,
1844                   "software version mismatch: master %s, node %s",
1845                   constants.RELEASE_VERSION, remote_version[1],
1846                   code=self.ETYPE_WARNING)
1847
1848     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1849     if ninfo.vm_capable and isinstance(hyp_result, dict):
1850       for hv_name, hv_result in hyp_result.iteritems():
1851         test = hv_result is not None
1852         _ErrorIf(test, self.ENODEHV, node,
1853                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1854
1855     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1856     if ninfo.vm_capable and isinstance(hvp_result, list):
1857       for item, hv_name, hv_result in hvp_result:
1858         _ErrorIf(True, self.ENODEHV, node,
1859                  "hypervisor %s parameter verify failure (source %s): %s",
1860                  hv_name, item, hv_result)
1861
1862     test = nresult.get(constants.NV_NODESETUP,
1863                        ["Missing NODESETUP results"])
1864     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1865              "; ".join(test))
1866
1867     return True
1868
1869   def _VerifyNodeTime(self, ninfo, nresult,
1870                       nvinfo_starttime, nvinfo_endtime):
1871     """Check the node time.
1872
1873     @type ninfo: L{objects.Node}
1874     @param ninfo: the node to check
1875     @param nresult: the remote results for the node
1876     @param nvinfo_starttime: the start time of the RPC call
1877     @param nvinfo_endtime: the end time of the RPC call
1878
1879     """
1880     node = ninfo.name
1881     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1882
1883     ntime = nresult.get(constants.NV_TIME, None)
1884     try:
1885       ntime_merged = utils.MergeTime(ntime)
1886     except (ValueError, TypeError):
1887       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1888       return
1889
1890     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1891       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1892     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1893       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1894     else:
1895       ntime_diff = None
1896
1897     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1898              "Node time diverges by at least %s from master node time",
1899              ntime_diff)
1900
1901   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1902     """Check the node LVM results.
1903
1904     @type ninfo: L{objects.Node}
1905     @param ninfo: the node to check
1906     @param nresult: the remote results for the node
1907     @param vg_name: the configured VG name
1908
1909     """
1910     if vg_name is None:
1911       return
1912
1913     node = ninfo.name
1914     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1915
1916     # checks vg existence and size > 20G
1917     vglist = nresult.get(constants.NV_VGLIST, None)
1918     test = not vglist
1919     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1920     if not test:
1921       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1922                                             constants.MIN_VG_SIZE)
1923       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1924
1925     # check pv names
1926     pvlist = nresult.get(constants.NV_PVLIST, None)
1927     test = pvlist is None
1928     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1929     if not test:
1930       # check that ':' is not present in PV names, since it's a
1931       # special character for lvcreate (denotes the range of PEs to
1932       # use on the PV)
1933       for _, pvname, owner_vg in pvlist:
1934         test = ":" in pvname
1935         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1936                  " '%s' of VG '%s'", pvname, owner_vg)
1937
1938   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1939     """Check the node bridges.
1940
1941     @type ninfo: L{objects.Node}
1942     @param ninfo: the node to check
1943     @param nresult: the remote results for the node
1944     @param bridges: the expected list of bridges
1945
1946     """
1947     if not bridges:
1948       return
1949
1950     node = ninfo.name
1951     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1952
1953     missing = nresult.get(constants.NV_BRIDGES, None)
1954     test = not isinstance(missing, list)
1955     _ErrorIf(test, self.ENODENET, node,
1956              "did not return valid bridge information")
1957     if not test:
1958       _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1959                utils.CommaJoin(sorted(missing)))
1960
1961   def _VerifyNodeNetwork(self, ninfo, nresult):
1962     """Check the node network connectivity results.
1963
1964     @type ninfo: L{objects.Node}
1965     @param ninfo: the node to check
1966     @param nresult: the remote results for the node
1967
1968     """
1969     node = ninfo.name
1970     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1971
1972     test = constants.NV_NODELIST not in nresult
1973     _ErrorIf(test, self.ENODESSH, node,
1974              "node hasn't returned node ssh connectivity data")
1975     if not test:
1976       if nresult[constants.NV_NODELIST]:
1977         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1978           _ErrorIf(True, self.ENODESSH, node,
1979                    "ssh communication with node '%s': %s", a_node, a_msg)
1980
1981     test = constants.NV_NODENETTEST not in nresult
1982     _ErrorIf(test, self.ENODENET, node,
1983              "node hasn't returned node tcp connectivity data")
1984     if not test:
1985       if nresult[constants.NV_NODENETTEST]:
1986         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1987         for anode in nlist:
1988           _ErrorIf(True, self.ENODENET, node,
1989                    "tcp communication with node '%s': %s",
1990                    anode, nresult[constants.NV_NODENETTEST][anode])
1991
1992     test = constants.NV_MASTERIP not in nresult
1993     _ErrorIf(test, self.ENODENET, node,
1994              "node hasn't returned node master IP reachability data")
1995     if not test:
1996       if not nresult[constants.NV_MASTERIP]:
1997         if node == self.master_node:
1998           msg = "the master node cannot reach the master IP (not configured?)"
1999         else:
2000           msg = "cannot reach the master IP"
2001         _ErrorIf(True, self.ENODENET, node, msg)
2002
2003   def _VerifyInstance(self, instance, instanceconfig, node_image,
2004                       diskstatus):
2005     """Verify an instance.
2006
2007     This function checks to see if the required block devices are
2008     available on the instance's node.
2009
2010     """
2011     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2012     node_current = instanceconfig.primary_node
2013
2014     node_vol_should = {}
2015     instanceconfig.MapLVsByNode(node_vol_should)
2016
2017     for node in node_vol_should:
2018       n_img = node_image[node]
2019       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2020         # ignore missing volumes on offline or broken nodes
2021         continue
2022       for volume in node_vol_should[node]:
2023         test = volume not in n_img.volumes
2024         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2025                  "volume %s missing on node %s", volume, node)
2026
2027     if instanceconfig.admin_up:
2028       pri_img = node_image[node_current]
2029       test = instance not in pri_img.instances and not pri_img.offline
2030       _ErrorIf(test, self.EINSTANCEDOWN, instance,
2031                "instance not running on its primary node %s",
2032                node_current)
2033
2034     diskdata = [(nname, success, status, idx)
2035                 for (nname, disks) in diskstatus.items()
2036                 for idx, (success, status) in enumerate(disks)]
2037
2038     for nname, success, bdev_status, idx in diskdata:
2039       # the 'ghost node' construction in Exec() ensures that we have a
2040       # node here
2041       snode = node_image[nname]
2042       bad_snode = snode.ghost or snode.offline
2043       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2044                self.EINSTANCEFAULTYDISK, instance,
2045                "couldn't retrieve status for disk/%s on %s: %s",
2046                idx, nname, bdev_status)
2047       _ErrorIf((instanceconfig.admin_up and success and
2048                 bdev_status.ldisk_status == constants.LDS_FAULTY),
2049                self.EINSTANCEFAULTYDISK, instance,
2050                "disk/%s on %s is faulty", idx, nname)
2051
2052   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2053     """Verify if there are any unknown volumes in the cluster.
2054
2055     The .os, .swap and backup volumes are ignored. All other volumes are
2056     reported as unknown.
2057
2058     @type reserved: L{ganeti.utils.FieldSet}
2059     @param reserved: a FieldSet of reserved volume names
2060
2061     """
2062     for node, n_img in node_image.items():
2063       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2064           self.all_node_info[node].group != self.group_uuid):
2065         # skip non-healthy nodes
2066         continue
2067       for volume in n_img.volumes:
2068         test = ((node not in node_vol_should or
2069                 volume not in node_vol_should[node]) and
2070                 not reserved.Matches(volume))
2071         self._ErrorIf(test, self.ENODEORPHANLV, node,
2072                       "volume %s is unknown", volume)
2073
2074   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2075     """Verify N+1 Memory Resilience.
2076
2077     Check that if one single node dies we can still start all the
2078     instances it was primary for.
2079
2080     """
2081     cluster_info = self.cfg.GetClusterInfo()
2082     for node, n_img in node_image.items():
2083       # This code checks that every node which is now listed as
2084       # secondary has enough memory to host all instances it is
2085       # supposed to should a single other node in the cluster fail.
2086       # FIXME: not ready for failover to an arbitrary node
2087       # FIXME: does not support file-backed instances
2088       # WARNING: we currently take into account down instances as well
2089       # as up ones, considering that even if they're down someone
2090       # might want to start them even in the event of a node failure.
2091       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2092         # we're skipping nodes marked offline and nodes in other groups from
2093         # the N+1 warning, since most likely we don't have good memory
2094         # infromation from them; we already list instances living on such
2095         # nodes, and that's enough warning
2096         continue
2097       for prinode, instances in n_img.sbp.items():
2098         needed_mem = 0
2099         for instance in instances:
2100           bep = cluster_info.FillBE(instance_cfg[instance])
2101           if bep[constants.BE_AUTO_BALANCE]:
2102             needed_mem += bep[constants.BE_MEMORY]
2103         test = n_img.mfree < needed_mem
2104         self._ErrorIf(test, self.ENODEN1, node,
2105                       "not enough memory to accomodate instance failovers"
2106                       " should node %s fail (%dMiB needed, %dMiB available)",
2107                       prinode, needed_mem, n_img.mfree)
2108
2109   @classmethod
2110   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2111                    (files_all, files_all_opt, files_mc, files_vm)):
2112     """Verifies file checksums collected from all nodes.
2113
2114     @param errorif: Callback for reporting errors
2115     @param nodeinfo: List of L{objects.Node} objects
2116     @param master_node: Name of master node
2117     @param all_nvinfo: RPC results
2118
2119     """
2120     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2121             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2122            "Found file listed in more than one file list"
2123
2124     # Define functions determining which nodes to consider for a file
2125     files2nodefn = [
2126       (files_all, None),
2127       (files_all_opt, None),
2128       (files_mc, lambda node: (node.master_candidate or
2129                                node.name == master_node)),
2130       (files_vm, lambda node: node.vm_capable),
2131       ]
2132
2133     # Build mapping from filename to list of nodes which should have the file
2134     nodefiles = {}
2135     for (files, fn) in files2nodefn:
2136       if fn is None:
2137         filenodes = nodeinfo
2138       else:
2139         filenodes = filter(fn, nodeinfo)
2140       nodefiles.update((filename,
2141                         frozenset(map(operator.attrgetter("name"), filenodes)))
2142                        for filename in files)
2143
2144     assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2145
2146     fileinfo = dict((filename, {}) for filename in nodefiles)
2147     ignore_nodes = set()
2148
2149     for node in nodeinfo:
2150       if node.offline:
2151         ignore_nodes.add(node.name)
2152         continue
2153
2154       nresult = all_nvinfo[node.name]
2155
2156       if nresult.fail_msg or not nresult.payload:
2157         node_files = None
2158       else:
2159         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2160
2161       test = not (node_files and isinstance(node_files, dict))
2162       errorif(test, cls.ENODEFILECHECK, node.name,
2163               "Node did not return file checksum data")
2164       if test:
2165         ignore_nodes.add(node.name)
2166         continue
2167
2168       # Build per-checksum mapping from filename to nodes having it
2169       for (filename, checksum) in node_files.items():
2170         assert filename in nodefiles
2171         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2172
2173     for (filename, checksums) in fileinfo.items():
2174       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2175
2176       # Nodes having the file
2177       with_file = frozenset(node_name
2178                             for nodes in fileinfo[filename].values()
2179                             for node_name in nodes) - ignore_nodes
2180
2181       expected_nodes = nodefiles[filename] - ignore_nodes
2182
2183       # Nodes missing file
2184       missing_file = expected_nodes - with_file
2185
2186       if filename in files_all_opt:
2187         # All or no nodes
2188         errorif(missing_file and missing_file != expected_nodes,
2189                 cls.ECLUSTERFILECHECK, None,
2190                 "File %s is optional, but it must exist on all or no"
2191                 " nodes (not found on %s)",
2192                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2193       else:
2194         # Non-optional files
2195         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2196                 "File %s is missing from node(s) %s", filename,
2197                 utils.CommaJoin(utils.NiceSort(missing_file)))
2198
2199         # Warn if a node has a file it shouldn't
2200         unexpected = with_file - expected_nodes
2201         errorif(unexpected,
2202                 cls.ECLUSTERFILECHECK, None,
2203                 "File %s should not exist on node(s) %s",
2204                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2205
2206       # See if there are multiple versions of the file
2207       test = len(checksums) > 1
2208       if test:
2209         variants = ["variant %s on %s" %
2210                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2211                     for (idx, (checksum, nodes)) in
2212                       enumerate(sorted(checksums.items()))]
2213       else:
2214         variants = []
2215
2216       errorif(test, cls.ECLUSTERFILECHECK, None,
2217               "File %s found with %s different checksums (%s)",
2218               filename, len(checksums), "; ".join(variants))
2219
2220   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2221                       drbd_map):
2222     """Verifies and the node DRBD status.
2223
2224     @type ninfo: L{objects.Node}
2225     @param ninfo: the node to check
2226     @param nresult: the remote results for the node
2227     @param instanceinfo: the dict of instances
2228     @param drbd_helper: the configured DRBD usermode helper
2229     @param drbd_map: the DRBD map as returned by
2230         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2231
2232     """
2233     node = ninfo.name
2234     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2235
2236     if drbd_helper:
2237       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2238       test = (helper_result == None)
2239       _ErrorIf(test, self.ENODEDRBDHELPER, node,
2240                "no drbd usermode helper returned")
2241       if helper_result:
2242         status, payload = helper_result
2243         test = not status
2244         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2245                  "drbd usermode helper check unsuccessful: %s", payload)
2246         test = status and (payload != drbd_helper)
2247         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2248                  "wrong drbd usermode helper: %s", payload)
2249
2250     # compute the DRBD minors
2251     node_drbd = {}
2252     for minor, instance in drbd_map[node].items():
2253       test = instance not in instanceinfo
2254       _ErrorIf(test, self.ECLUSTERCFG, None,
2255                "ghost instance '%s' in temporary DRBD map", instance)
2256         # ghost instance should not be running, but otherwise we
2257         # don't give double warnings (both ghost instance and
2258         # unallocated minor in use)
2259       if test:
2260         node_drbd[minor] = (instance, False)
2261       else:
2262         instance = instanceinfo[instance]
2263         node_drbd[minor] = (instance.name, instance.admin_up)
2264
2265     # and now check them
2266     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2267     test = not isinstance(used_minors, (tuple, list))
2268     _ErrorIf(test, self.ENODEDRBD, node,
2269              "cannot parse drbd status file: %s", str(used_minors))
2270     if test:
2271       # we cannot check drbd status
2272       return
2273
2274     for minor, (iname, must_exist) in node_drbd.items():
2275       test = minor not in used_minors and must_exist
2276       _ErrorIf(test, self.ENODEDRBD, node,
2277                "drbd minor %d of instance %s is not active", minor, iname)
2278     for minor in used_minors:
2279       test = minor not in node_drbd
2280       _ErrorIf(test, self.ENODEDRBD, node,
2281                "unallocated drbd minor %d is in use", minor)
2282
2283   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2284     """Builds the node OS structures.
2285
2286     @type ninfo: L{objects.Node}
2287     @param ninfo: the node to check
2288     @param nresult: the remote results for the node
2289     @param nimg: the node image object
2290
2291     """
2292     node = ninfo.name
2293     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2294
2295     remote_os = nresult.get(constants.NV_OSLIST, None)
2296     test = (not isinstance(remote_os, list) or
2297             not compat.all(isinstance(v, list) and len(v) == 7
2298                            for v in remote_os))
2299
2300     _ErrorIf(test, self.ENODEOS, node,
2301              "node hasn't returned valid OS data")
2302
2303     nimg.os_fail = test
2304
2305     if test:
2306       return
2307
2308     os_dict = {}
2309
2310     for (name, os_path, status, diagnose,
2311          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2312
2313       if name not in os_dict:
2314         os_dict[name] = []
2315
2316       # parameters is a list of lists instead of list of tuples due to
2317       # JSON lacking a real tuple type, fix it:
2318       parameters = [tuple(v) for v in parameters]
2319       os_dict[name].append((os_path, status, diagnose,
2320                             set(variants), set(parameters), set(api_ver)))
2321
2322     nimg.oslist = os_dict
2323
2324   def _VerifyNodeOS(self, ninfo, nimg, base):
2325     """Verifies the node OS list.
2326
2327     @type ninfo: L{objects.Node}
2328     @param ninfo: the node to check
2329     @param nimg: the node image object
2330     @param base: the 'template' node we match against (e.g. from the master)
2331
2332     """
2333     node = ninfo.name
2334     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2335
2336     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2337
2338     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2339     for os_name, os_data in nimg.oslist.items():
2340       assert os_data, "Empty OS status for OS %s?!" % os_name
2341       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2342       _ErrorIf(not f_status, self.ENODEOS, node,
2343                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2344       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2345                "OS '%s' has multiple entries (first one shadows the rest): %s",
2346                os_name, utils.CommaJoin([v[0] for v in os_data]))
2347       # comparisons with the 'base' image
2348       test = os_name not in base.oslist
2349       _ErrorIf(test, self.ENODEOS, node,
2350                "Extra OS %s not present on reference node (%s)",
2351                os_name, base.name)
2352       if test:
2353         continue
2354       assert base.oslist[os_name], "Base node has empty OS status?"
2355       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2356       if not b_status:
2357         # base OS is invalid, skipping
2358         continue
2359       for kind, a, b in [("API version", f_api, b_api),
2360                          ("variants list", f_var, b_var),
2361                          ("parameters", beautify_params(f_param),
2362                           beautify_params(b_param))]:
2363         _ErrorIf(a != b, self.ENODEOS, node,
2364                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2365                  kind, os_name, base.name,
2366                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2367
2368     # check any missing OSes
2369     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2370     _ErrorIf(missing, self.ENODEOS, node,
2371              "OSes present on reference node %s but missing on this node: %s",
2372              base.name, utils.CommaJoin(missing))
2373
2374   def _VerifyOob(self, ninfo, nresult):
2375     """Verifies out of band functionality of a node.
2376
2377     @type ninfo: L{objects.Node}
2378     @param ninfo: the node to check
2379     @param nresult: the remote results for the node
2380
2381     """
2382     node = ninfo.name
2383     # We just have to verify the paths on master and/or master candidates
2384     # as the oob helper is invoked on the master
2385     if ((ninfo.master_candidate or ninfo.master_capable) and
2386         constants.NV_OOB_PATHS in nresult):
2387       for path_result in nresult[constants.NV_OOB_PATHS]:
2388         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2389
2390   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2391     """Verifies and updates the node volume data.
2392
2393     This function will update a L{NodeImage}'s internal structures
2394     with data from the remote call.
2395
2396     @type ninfo: L{objects.Node}
2397     @param ninfo: the node to check
2398     @param nresult: the remote results for the node
2399     @param nimg: the node image object
2400     @param vg_name: the configured VG name
2401
2402     """
2403     node = ninfo.name
2404     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2405
2406     nimg.lvm_fail = True
2407     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2408     if vg_name is None:
2409       pass
2410     elif isinstance(lvdata, basestring):
2411       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2412                utils.SafeEncode(lvdata))
2413     elif not isinstance(lvdata, dict):
2414       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2415     else:
2416       nimg.volumes = lvdata
2417       nimg.lvm_fail = False
2418
2419   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2420     """Verifies and updates the node instance list.
2421
2422     If the listing was successful, then updates this node's instance
2423     list. Otherwise, it marks the RPC call as failed for the instance
2424     list key.
2425
2426     @type ninfo: L{objects.Node}
2427     @param ninfo: the node to check
2428     @param nresult: the remote results for the node
2429     @param nimg: the node image object
2430
2431     """
2432     idata = nresult.get(constants.NV_INSTANCELIST, None)
2433     test = not isinstance(idata, list)
2434     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2435                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2436     if test:
2437       nimg.hyp_fail = True
2438     else:
2439       nimg.instances = idata
2440
2441   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2442     """Verifies and computes a node information map
2443
2444     @type ninfo: L{objects.Node}
2445     @param ninfo: the node to check
2446     @param nresult: the remote results for the node
2447     @param nimg: the node image object
2448     @param vg_name: the configured VG name
2449
2450     """
2451     node = ninfo.name
2452     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2453
2454     # try to read free memory (from the hypervisor)
2455     hv_info = nresult.get(constants.NV_HVINFO, None)
2456     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2457     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2458     if not test:
2459       try:
2460         nimg.mfree = int(hv_info["memory_free"])
2461       except (ValueError, TypeError):
2462         _ErrorIf(True, self.ENODERPC, node,
2463                  "node returned invalid nodeinfo, check hypervisor")
2464
2465     # FIXME: devise a free space model for file based instances as well
2466     if vg_name is not None:
2467       test = (constants.NV_VGLIST not in nresult or
2468               vg_name not in nresult[constants.NV_VGLIST])
2469       _ErrorIf(test, self.ENODELVM, node,
2470                "node didn't return data for the volume group '%s'"
2471                " - it is either missing or broken", vg_name)
2472       if not test:
2473         try:
2474           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2475         except (ValueError, TypeError):
2476           _ErrorIf(True, self.ENODERPC, node,
2477                    "node returned invalid LVM info, check LVM status")
2478
2479   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2480     """Gets per-disk status information for all instances.
2481
2482     @type nodelist: list of strings
2483     @param nodelist: Node names
2484     @type node_image: dict of (name, L{objects.Node})
2485     @param node_image: Node objects
2486     @type instanceinfo: dict of (name, L{objects.Instance})
2487     @param instanceinfo: Instance objects
2488     @rtype: {instance: {node: [(succes, payload)]}}
2489     @return: a dictionary of per-instance dictionaries with nodes as
2490         keys and disk information as values; the disk information is a
2491         list of tuples (success, payload)
2492
2493     """
2494     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2495
2496     node_disks = {}
2497     node_disks_devonly = {}
2498     diskless_instances = set()
2499     diskless = constants.DT_DISKLESS
2500
2501     for nname in nodelist:
2502       node_instances = list(itertools.chain(node_image[nname].pinst,
2503                                             node_image[nname].sinst))
2504       diskless_instances.update(inst for inst in node_instances
2505                                 if instanceinfo[inst].disk_template == diskless)
2506       disks = [(inst, disk)
2507                for inst in node_instances
2508                for disk in instanceinfo[inst].disks]
2509
2510       if not disks:
2511         # No need to collect data
2512         continue
2513
2514       node_disks[nname] = disks
2515
2516       # Creating copies as SetDiskID below will modify the objects and that can
2517       # lead to incorrect data returned from nodes
2518       devonly = [dev.Copy() for (_, dev) in disks]
2519
2520       for dev in devonly:
2521         self.cfg.SetDiskID(dev, nname)
2522
2523       node_disks_devonly[nname] = devonly
2524
2525     assert len(node_disks) == len(node_disks_devonly)
2526
2527     # Collect data from all nodes with disks
2528     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2529                                                           node_disks_devonly)
2530
2531     assert len(result) == len(node_disks)
2532
2533     instdisk = {}
2534
2535     for (nname, nres) in result.items():
2536       disks = node_disks[nname]
2537
2538       if nres.offline:
2539         # No data from this node
2540         data = len(disks) * [(False, "node offline")]
2541       else:
2542         msg = nres.fail_msg
2543         _ErrorIf(msg, self.ENODERPC, nname,
2544                  "while getting disk information: %s", msg)
2545         if msg:
2546           # No data from this node
2547           data = len(disks) * [(False, msg)]
2548         else:
2549           data = []
2550           for idx, i in enumerate(nres.payload):
2551             if isinstance(i, (tuple, list)) and len(i) == 2:
2552               data.append(i)
2553             else:
2554               logging.warning("Invalid result from node %s, entry %d: %s",
2555                               nname, idx, i)
2556               data.append((False, "Invalid result from the remote node"))
2557
2558       for ((inst, _), status) in zip(disks, data):
2559         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2560
2561     # Add empty entries for diskless instances.
2562     for inst in diskless_instances:
2563       assert inst not in instdisk
2564       instdisk[inst] = {}
2565
2566     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2567                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2568                       compat.all(isinstance(s, (tuple, list)) and
2569                                  len(s) == 2 for s in statuses)
2570                       for inst, nnames in instdisk.items()
2571                       for nname, statuses in nnames.items())
2572     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2573
2574     return instdisk
2575
2576   @staticmethod
2577   def _SshNodeSelector(group_uuid, all_nodes):
2578     """Create endless iterators for all potential SSH check hosts.
2579
2580     """
2581     nodes = [node for node in all_nodes
2582              if (node.group != group_uuid and
2583                  not node.offline)]
2584     keyfunc = operator.attrgetter("group")
2585
2586     return map(itertools.cycle,
2587                [sorted(map(operator.attrgetter("name"), names))
2588                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2589                                                   keyfunc)])
2590
2591   @classmethod
2592   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2593     """Choose which nodes should talk to which other nodes.
2594
2595     We will make nodes contact all nodes in their group, and one node from
2596     every other group.
2597
2598     @warning: This algorithm has a known issue if one node group is much
2599       smaller than others (e.g. just one node). In such a case all other
2600       nodes will talk to the single node.
2601
2602     """
2603     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2604     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2605
2606     return (online_nodes,
2607             dict((name, sorted([i.next() for i in sel]))
2608                  for name in online_nodes))
2609
2610   def BuildHooksEnv(self):
2611     """Build hooks env.
2612
2613     Cluster-Verify hooks just ran in the post phase and their failure makes
2614     the output be logged in the verify output and the verification to fail.
2615
2616     """
2617     env = {
2618       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2619       }
2620
2621     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2622                for node in self.my_node_info.values())
2623
2624     return env
2625
2626   def BuildHooksNodes(self):
2627     """Build hooks nodes.
2628
2629     """
2630     return ([], self.my_node_names)
2631
2632   def Exec(self, feedback_fn):
2633     """Verify integrity of the node group, performing various test on nodes.
2634
2635     """
2636     # This method has too many local variables. pylint: disable=R0914
2637     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2638
2639     if not self.my_node_names:
2640       # empty node group
2641       feedback_fn("* Empty node group, skipping verification")
2642       return True
2643
2644     self.bad = False
2645     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2646     verbose = self.op.verbose
2647     self._feedback_fn = feedback_fn
2648
2649     vg_name = self.cfg.GetVGName()
2650     drbd_helper = self.cfg.GetDRBDHelper()
2651     cluster = self.cfg.GetClusterInfo()
2652     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2653     hypervisors = cluster.enabled_hypervisors
2654     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2655
2656     i_non_redundant = [] # Non redundant instances
2657     i_non_a_balanced = [] # Non auto-balanced instances
2658     n_offline = 0 # Count of offline nodes
2659     n_drained = 0 # Count of nodes being drained
2660     node_vol_should = {}
2661
2662     # FIXME: verify OS list
2663
2664     # File verification
2665     filemap = _ComputeAncillaryFiles(cluster, False)
2666
2667     # do local checksums
2668     master_node = self.master_node = self.cfg.GetMasterNode()
2669     master_ip = self.cfg.GetMasterIP()
2670
2671     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2672
2673     node_verify_param = {
2674       constants.NV_FILELIST:
2675         utils.UniqueSequence(filename
2676                              for files in filemap
2677                              for filename in files),
2678       constants.NV_NODELIST:
2679         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2680                                   self.all_node_info.values()),
2681       constants.NV_HYPERVISOR: hypervisors,
2682       constants.NV_HVPARAMS:
2683         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2684       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2685                                  for node in node_data_list
2686                                  if not node.offline],
2687       constants.NV_INSTANCELIST: hypervisors,
2688       constants.NV_VERSION: None,
2689       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2690       constants.NV_NODESETUP: None,
2691       constants.NV_TIME: None,
2692       constants.NV_MASTERIP: (master_node, master_ip),
2693       constants.NV_OSLIST: None,
2694       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2695       }
2696
2697     if vg_name is not None:
2698       node_verify_param[constants.NV_VGLIST] = None
2699       node_verify_param[constants.NV_LVLIST] = vg_name
2700       node_verify_param[constants.NV_PVLIST] = [vg_name]
2701       node_verify_param[constants.NV_DRBDLIST] = None
2702
2703     if drbd_helper:
2704       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2705
2706     # bridge checks
2707     # FIXME: this needs to be changed per node-group, not cluster-wide
2708     bridges = set()
2709     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2710     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2711       bridges.add(default_nicpp[constants.NIC_LINK])
2712     for instance in self.my_inst_info.values():
2713       for nic in instance.nics:
2714         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2715         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2716           bridges.add(full_nic[constants.NIC_LINK])
2717
2718     if bridges:
2719       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2720
2721     # Build our expected cluster state
2722     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2723                                                  name=node.name,
2724                                                  vm_capable=node.vm_capable))
2725                       for node in node_data_list)
2726
2727     # Gather OOB paths
2728     oob_paths = []
2729     for node in self.all_node_info.values():
2730       path = _SupportsOob(self.cfg, node)
2731       if path and path not in oob_paths:
2732         oob_paths.append(path)
2733
2734     if oob_paths:
2735       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2736
2737     for instance in self.my_inst_names:
2738       inst_config = self.my_inst_info[instance]
2739
2740       for nname in inst_config.all_nodes:
2741         if nname not in node_image:
2742           gnode = self.NodeImage(name=nname)
2743           gnode.ghost = (nname not in self.all_node_info)
2744           node_image[nname] = gnode
2745
2746       inst_config.MapLVsByNode(node_vol_should)
2747
2748       pnode = inst_config.primary_node
2749       node_image[pnode].pinst.append(instance)
2750
2751       for snode in inst_config.secondary_nodes:
2752         nimg = node_image[snode]
2753         nimg.sinst.append(instance)
2754         if pnode not in nimg.sbp:
2755           nimg.sbp[pnode] = []
2756         nimg.sbp[pnode].append(instance)
2757
2758     # At this point, we have the in-memory data structures complete,
2759     # except for the runtime information, which we'll gather next
2760
2761     # Due to the way our RPC system works, exact response times cannot be
2762     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2763     # time before and after executing the request, we can at least have a time
2764     # window.
2765     nvinfo_starttime = time.time()
2766     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2767                                            node_verify_param,
2768                                            self.cfg.GetClusterName())
2769     nvinfo_endtime = time.time()
2770
2771     if self.extra_lv_nodes and vg_name is not None:
2772       extra_lv_nvinfo = \
2773           self.rpc.call_node_verify(self.extra_lv_nodes,
2774                                     {constants.NV_LVLIST: vg_name},
2775                                     self.cfg.GetClusterName())
2776     else:
2777       extra_lv_nvinfo = {}
2778
2779     all_drbd_map = self.cfg.ComputeDRBDMap()
2780
2781     feedback_fn("* Gathering disk information (%s nodes)" %
2782                 len(self.my_node_names))
2783     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2784                                      self.my_inst_info)
2785
2786     feedback_fn("* Verifying configuration file consistency")
2787
2788     # If not all nodes are being checked, we need to make sure the master node
2789     # and a non-checked vm_capable node are in the list.
2790     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2791     if absent_nodes:
2792       vf_nvinfo = all_nvinfo.copy()
2793       vf_node_info = list(self.my_node_info.values())
2794       additional_nodes = []
2795       if master_node not in self.my_node_info:
2796         additional_nodes.append(master_node)
2797         vf_node_info.append(self.all_node_info[master_node])
2798       # Add the first vm_capable node we find which is not included
2799       for node in absent_nodes:
2800         nodeinfo = self.all_node_info[node]
2801         if nodeinfo.vm_capable and not nodeinfo.offline:
2802           additional_nodes.append(node)
2803           vf_node_info.append(self.all_node_info[node])
2804           break
2805       key = constants.NV_FILELIST
2806       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2807                                                  {key: node_verify_param[key]},
2808                                                  self.cfg.GetClusterName()))
2809     else:
2810       vf_nvinfo = all_nvinfo
2811       vf_node_info = self.my_node_info.values()
2812
2813     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2814
2815     feedback_fn("* Verifying node status")
2816
2817     refos_img = None
2818
2819     for node_i in node_data_list:
2820       node = node_i.name
2821       nimg = node_image[node]
2822
2823       if node_i.offline:
2824         if verbose:
2825           feedback_fn("* Skipping offline node %s" % (node,))
2826         n_offline += 1
2827         continue
2828
2829       if node == master_node:
2830         ntype = "master"
2831       elif node_i.master_candidate:
2832         ntype = "master candidate"
2833       elif node_i.drained:
2834         ntype = "drained"
2835         n_drained += 1
2836       else:
2837         ntype = "regular"
2838       if verbose:
2839         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2840
2841       msg = all_nvinfo[node].fail_msg
2842       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2843       if msg:
2844         nimg.rpc_fail = True
2845         continue
2846
2847       nresult = all_nvinfo[node].payload
2848
2849       nimg.call_ok = self._VerifyNode(node_i, nresult)
2850       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2851       self._VerifyNodeNetwork(node_i, nresult)
2852       self._VerifyOob(node_i, nresult)
2853
2854       if nimg.vm_capable:
2855         self._VerifyNodeLVM(node_i, nresult, vg_name)
2856         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2857                              all_drbd_map)
2858
2859         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2860         self._UpdateNodeInstances(node_i, nresult, nimg)
2861         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2862         self._UpdateNodeOS(node_i, nresult, nimg)
2863
2864         if not nimg.os_fail:
2865           if refos_img is None:
2866             refos_img = nimg
2867           self._VerifyNodeOS(node_i, nimg, refos_img)
2868         self._VerifyNodeBridges(node_i, nresult, bridges)
2869
2870         # Check whether all running instancies are primary for the node. (This
2871         # can no longer be done from _VerifyInstance below, since some of the
2872         # wrong instances could be from other node groups.)
2873         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2874
2875         for inst in non_primary_inst:
2876           test = inst in self.all_inst_info
2877           _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2878                    "instance should not run on node %s", node_i.name)
2879           _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2880                    "node is running unknown instance %s", inst)
2881
2882     for node, result in extra_lv_nvinfo.items():
2883       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2884                               node_image[node], vg_name)
2885
2886     feedback_fn("* Verifying instance status")
2887     for instance in self.my_inst_names:
2888       if verbose:
2889         feedback_fn("* Verifying instance %s" % instance)
2890       inst_config = self.my_inst_info[instance]
2891       self._VerifyInstance(instance, inst_config, node_image,
2892                            instdisk[instance])
2893       inst_nodes_offline = []
2894
2895       pnode = inst_config.primary_node
2896       pnode_img = node_image[pnode]
2897       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2898                self.ENODERPC, pnode, "instance %s, connection to"
2899                " primary node failed", instance)
2900
2901       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2902                self.EINSTANCEBADNODE, instance,
2903                "instance is marked as running and lives on offline node %s",
2904                inst_config.primary_node)
2905
2906       # If the instance is non-redundant we cannot survive losing its primary
2907       # node, so we are not N+1 compliant. On the other hand we have no disk
2908       # templates with more than one secondary so that situation is not well
2909       # supported either.
2910       # FIXME: does not support file-backed instances
2911       if not inst_config.secondary_nodes:
2912         i_non_redundant.append(instance)
2913
2914       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2915                instance, "instance has multiple secondary nodes: %s",
2916                utils.CommaJoin(inst_config.secondary_nodes),
2917                code=self.ETYPE_WARNING)
2918
2919       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2920         pnode = inst_config.primary_node
2921         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2922         instance_groups = {}
2923
2924         for node in instance_nodes:
2925           instance_groups.setdefault(self.all_node_info[node].group,
2926                                      []).append(node)
2927
2928         pretty_list = [
2929           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2930           # Sort so that we always list the primary node first.
2931           for group, nodes in sorted(instance_groups.items(),
2932                                      key=lambda (_, nodes): pnode in nodes,
2933                                      reverse=True)]
2934
2935         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2936                       instance, "instance has primary and secondary nodes in"
2937                       " different groups: %s", utils.CommaJoin(pretty_list),
2938                       code=self.ETYPE_WARNING)
2939
2940       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2941         i_non_a_balanced.append(instance)
2942
2943       for snode in inst_config.secondary_nodes:
2944         s_img = node_image[snode]
2945         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2946                  "instance %s, connection to secondary node failed", instance)
2947
2948         if s_img.offline:
2949           inst_nodes_offline.append(snode)
2950
2951       # warn that the instance lives on offline nodes
2952       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2953                "instance has offline secondary node(s) %s",
2954                utils.CommaJoin(inst_nodes_offline))
2955       # ... or ghost/non-vm_capable nodes
2956       for node in inst_config.all_nodes:
2957         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2958                  "instance lives on ghost node %s", node)
2959         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2960                  instance, "instance lives on non-vm_capable node %s", node)
2961
2962     feedback_fn("* Verifying orphan volumes")
2963     reserved = utils.FieldSet(*cluster.reserved_lvs)
2964
2965     # We will get spurious "unknown volume" warnings if any node of this group
2966     # is secondary for an instance whose primary is in another group. To avoid
2967     # them, we find these instances and add their volumes to node_vol_should.
2968     for inst in self.all_inst_info.values():
2969       for secondary in inst.secondary_nodes:
2970         if (secondary in self.my_node_info
2971             and inst.name not in self.my_inst_info):
2972           inst.MapLVsByNode(node_vol_should)
2973           break
2974
2975     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2976
2977     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2978       feedback_fn("* Verifying N+1 Memory redundancy")
2979       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2980
2981     feedback_fn("* Other Notes")
2982     if i_non_redundant:
2983       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2984                   % len(i_non_redundant))
2985
2986     if i_non_a_balanced:
2987       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2988                   % len(i_non_a_balanced))
2989
2990     if n_offline:
2991       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2992
2993     if n_drained:
2994       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2995
2996     return not self.bad
2997
2998   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2999     """Analyze the post-hooks' result
3000
3001     This method analyses the hook result, handles it, and sends some
3002     nicely-formatted feedback back to the user.
3003
3004     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3005         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3006     @param hooks_results: the results of the multi-node hooks rpc call
3007     @param feedback_fn: function used send feedback back to the caller
3008     @param lu_result: previous Exec result
3009     @return: the new Exec result, based on the previous result
3010         and hook results
3011
3012     """
3013     # We only really run POST phase hooks, only for non-empty groups,
3014     # and are only interested in their results
3015     if not self.my_node_names:
3016       # empty node group
3017       pass
3018     elif phase == constants.HOOKS_PHASE_POST:
3019       # Used to change hooks' output to proper indentation
3020       feedback_fn("* Hooks Results")
3021       assert hooks_results, "invalid result from hooks"
3022
3023       for node_name in hooks_results:
3024         res = hooks_results[node_name]
3025         msg = res.fail_msg
3026         test = msg and not res.offline
3027         self._ErrorIf(test, self.ENODEHOOKS, node_name,
3028                       "Communication failure in hooks execution: %s", msg)
3029         if res.offline or msg:
3030           # No need to investigate payload if node is offline or gave
3031           # an error.
3032           continue
3033         for script, hkr, output in res.payload:
3034           test = hkr == constants.HKR_FAIL
3035           self._ErrorIf(test, self.ENODEHOOKS, node_name,
3036                         "Script %s failed, output:", script)
3037           if test:
3038             output = self._HOOKS_INDENT_RE.sub("      ", output)
3039             feedback_fn("%s" % output)
3040             lu_result = False
3041
3042     return lu_result
3043
3044
3045 class LUClusterVerifyDisks(NoHooksLU):
3046   """Verifies the cluster disks status.
3047
3048   """
3049   REQ_BGL = False
3050
3051   def ExpandNames(self):
3052     self.share_locks = _ShareAll()
3053     self.needed_locks = {
3054       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3055       }
3056
3057   def Exec(self, feedback_fn):
3058     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3059
3060     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3061     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3062                            for group in group_names])
3063
3064
3065 class LUGroupVerifyDisks(NoHooksLU):
3066   """Verifies the status of all disks in a node group.
3067
3068   """
3069   REQ_BGL = False
3070
3071   def ExpandNames(self):
3072     # Raises errors.OpPrereqError on its own if group can't be found
3073     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3074
3075     self.share_locks = _ShareAll()
3076     self.needed_locks = {
3077       locking.LEVEL_INSTANCE: [],
3078       locking.LEVEL_NODEGROUP: [],
3079       locking.LEVEL_NODE: [],
3080       }
3081
3082   def DeclareLocks(self, level):
3083     if level == locking.LEVEL_INSTANCE:
3084       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3085
3086       # Lock instances optimistically, needs verification once node and group
3087       # locks have been acquired
3088       self.needed_locks[locking.LEVEL_INSTANCE] = \
3089         self.cfg.GetNodeGroupInstances(self.group_uuid)
3090
3091     elif level == locking.LEVEL_NODEGROUP:
3092       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3093
3094       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3095         set([self.group_uuid] +
3096             # Lock all groups used by instances optimistically; this requires
3097             # going via the node before it's locked, requiring verification
3098             # later on
3099             [group_uuid
3100              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3101              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3102
3103     elif level == locking.LEVEL_NODE:
3104       # This will only lock the nodes in the group to be verified which contain
3105       # actual instances
3106       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3107       self._LockInstancesNodes()
3108
3109       # Lock all nodes in group to be verified
3110       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3111       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3112       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3113
3114   def CheckPrereq(self):
3115     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3116     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3117     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3118
3119     assert self.group_uuid in owned_groups
3120
3121     # Check if locked instances are still correct
3122     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3123
3124     # Get instance information
3125     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3126
3127     # Check if node groups for locked instances are still correct
3128     for (instance_name, inst) in self.instances.items():
3129       assert owned_nodes.issuperset(inst.all_nodes), \
3130         "Instance %s's nodes changed while we kept the lock" % instance_name
3131
3132       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3133                                              owned_groups)
3134
3135       assert self.group_uuid in inst_groups, \
3136         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3137
3138   def Exec(self, feedback_fn):
3139     """Verify integrity of cluster disks.
3140
3141     @rtype: tuple of three items
3142     @return: a tuple of (dict of node-to-node_error, list of instances
3143         which need activate-disks, dict of instance: (node, volume) for
3144         missing volumes
3145
3146     """
3147     res_nodes = {}
3148     res_instances = set()
3149     res_missing = {}
3150
3151     nv_dict = _MapInstanceDisksToNodes([inst
3152                                         for inst in self.instances.values()
3153                                         if inst.admin_up])
3154
3155     if nv_dict:
3156       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3157                              set(self.cfg.GetVmCapableNodeList()))
3158
3159       node_lvs = self.rpc.call_lv_list(nodes, [])
3160
3161       for (node, node_res) in node_lvs.items():
3162         if node_res.offline:
3163           continue
3164
3165         msg = node_res.fail_msg
3166         if msg:
3167           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3168           res_nodes[node] = msg
3169           continue
3170
3171         for lv_name, (_, _, lv_online) in node_res.payload.items():
3172           inst = nv_dict.pop((node, lv_name), None)
3173           if not (lv_online or inst is None):
3174             res_instances.add(inst)
3175
3176       # any leftover items in nv_dict are missing LVs, let's arrange the data
3177       # better
3178       for key, inst in nv_dict.iteritems():
3179         res_missing.setdefault(inst, []).append(list(key))
3180
3181     return (res_nodes, list(res_instances), res_missing)
3182
3183
3184 class LUClusterRepairDiskSizes(NoHooksLU):
3185   """Verifies the cluster disks sizes.
3186
3187   """
3188   REQ_BGL = False
3189
3190   def ExpandNames(self):
3191     if self.op.instances:
3192       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3193       self.needed_locks = {
3194         locking.LEVEL_NODE: [],
3195         locking.LEVEL_INSTANCE: self.wanted_names,
3196         }
3197       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3198     else:
3199       self.wanted_names = None
3200       self.needed_locks = {
3201         locking.LEVEL_NODE: locking.ALL_SET,
3202         locking.LEVEL_INSTANCE: locking.ALL_SET,
3203         }
3204     self.share_locks = {
3205       locking.LEVEL_NODE: 1,
3206       locking.LEVEL_INSTANCE: 0,
3207       }
3208
3209   def DeclareLocks(self, level):
3210     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3211       self._LockInstancesNodes(primary_only=True)
3212
3213   def CheckPrereq(self):
3214     """Check prerequisites.
3215
3216     This only checks the optional instance list against the existing names.
3217
3218     """
3219     if self.wanted_names is None:
3220       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3221
3222     self.wanted_instances = \
3223         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3224
3225   def _EnsureChildSizes(self, disk):
3226     """Ensure children of the disk have the needed disk size.
3227
3228     This is valid mainly for DRBD8 and fixes an issue where the
3229     children have smaller disk size.
3230
3231     @param disk: an L{ganeti.objects.Disk} object
3232
3233     """
3234     if disk.dev_type == constants.LD_DRBD8:
3235       assert disk.children, "Empty children for DRBD8?"
3236       fchild = disk.children[0]
3237       mismatch = fchild.size < disk.size
3238       if mismatch:
3239         self.LogInfo("Child disk has size %d, parent %d, fixing",
3240                      fchild.size, disk.size)
3241         fchild.size = disk.size
3242
3243       # and we recurse on this child only, not on the metadev
3244       return self._EnsureChildSizes(fchild) or mismatch
3245     else:
3246       return False
3247
3248   def Exec(self, feedback_fn):
3249     """Verify the size of cluster disks.
3250
3251     """
3252     # TODO: check child disks too
3253     # TODO: check differences in size between primary/secondary nodes
3254     per_node_disks = {}
3255     for instance in self.wanted_instances:
3256       pnode = instance.primary_node
3257       if pnode not in per_node_disks:
3258         per_node_disks[pnode] = []
3259       for idx, disk in enumerate(instance.disks):
3260         per_node_disks[pnode].append((instance, idx, disk))
3261
3262     changed = []
3263     for node, dskl in per_node_disks.items():
3264       newl = [v[2].Copy() for v in dskl]
3265       for dsk in newl:
3266         self.cfg.SetDiskID(dsk, node)
3267       result = self.rpc.call_blockdev_getsize(node, newl)
3268       if result.fail_msg:
3269         self.LogWarning("Failure in blockdev_getsize call to node"
3270                         " %s, ignoring", node)
3271         continue
3272       if len(result.payload) != len(dskl):
3273         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3274                         " result.payload=%s", node, len(dskl), result.payload)
3275         self.LogWarning("Invalid result from node %s, ignoring node results",
3276                         node)
3277         continue
3278       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3279         if size is None:
3280           self.LogWarning("Disk %d of instance %s did not return size"
3281                           " information, ignoring", idx, instance.name)
3282           continue
3283         if not isinstance(size, (int, long)):
3284           self.LogWarning("Disk %d of instance %s did not return valid"
3285                           " size information, ignoring", idx, instance.name)
3286           continue
3287         size = size >> 20
3288         if size != disk.size:
3289           self.LogInfo("Disk %d of instance %s has mismatched size,"
3290                        " correcting: recorded %d, actual %d", idx,
3291                        instance.name, disk.size, size)
3292           disk.size = size
3293           self.cfg.Update(instance, feedback_fn)
3294           changed.append((instance.name, idx, size))
3295         if self._EnsureChildSizes(disk):
3296           self.cfg.Update(instance, feedback_fn)
3297           changed.append((instance.name, idx, disk.size))
3298     return changed
3299
3300
3301 class LUClusterRename(LogicalUnit):
3302   """Rename the cluster.
3303
3304   """
3305   HPATH = "cluster-rename"
3306   HTYPE = constants.HTYPE_CLUSTER
3307
3308   def BuildHooksEnv(self):
3309     """Build hooks env.
3310
3311     """
3312     return {
3313       "OP_TARGET": self.cfg.GetClusterName(),
3314       "NEW_NAME": self.op.name,
3315       }
3316
3317   def BuildHooksNodes(self):
3318     """Build hooks nodes.
3319
3320     """
3321     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3322
3323   def CheckPrereq(self):
3324     """Verify that the passed name is a valid one.
3325
3326     """
3327     hostname = netutils.GetHostname(name=self.op.name,
3328                                     family=self.cfg.GetPrimaryIPFamily())
3329
3330     new_name = hostname.name
3331     self.ip = new_ip = hostname.ip
3332     old_name = self.cfg.GetClusterName()
3333     old_ip = self.cfg.GetMasterIP()
3334     if new_name == old_name and new_ip == old_ip:
3335       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3336                                  " cluster has changed",
3337                                  errors.ECODE_INVAL)
3338     if new_ip != old_ip:
3339       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3340         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3341                                    " reachable on the network" %
3342                                    new_ip, errors.ECODE_NOTUNIQUE)
3343
3344     self.op.name = new_name
3345
3346   def Exec(self, feedback_fn):
3347     """Rename the cluster.
3348
3349     """
3350     clustername = self.op.name
3351     ip = self.ip
3352
3353     # shutdown the master IP
3354     master = self.cfg.GetMasterNode()
3355     result = self.rpc.call_node_stop_master(master, False)
3356     result.Raise("Could not disable the master role")
3357
3358     try:
3359       cluster = self.cfg.GetClusterInfo()
3360       cluster.cluster_name = clustername
3361       cluster.master_ip = ip
3362       self.cfg.Update(cluster, feedback_fn)
3363
3364       # update the known hosts file
3365       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3366       node_list = self.cfg.GetOnlineNodeList()
3367       try:
3368         node_list.remove(master)
3369       except ValueError:
3370         pass
3371       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3372     finally:
3373       result = self.rpc.call_node_start_master(master, False, False)
3374       msg = result.fail_msg
3375       if msg:
3376         self.LogWarning("Could not re-enable the master role on"
3377                         " the master, please restart manually: %s", msg)
3378
3379     return clustername
3380
3381
3382 class LUClusterSetParams(LogicalUnit):
3383   """Change the parameters of the cluster.
3384
3385   """
3386   HPATH = "cluster-modify"
3387   HTYPE = constants.HTYPE_CLUSTER
3388   REQ_BGL = False
3389
3390   def CheckArguments(self):
3391     """Check parameters
3392
3393     """
3394     if self.op.uid_pool:
3395       uidpool.CheckUidPool(self.op.uid_pool)
3396
3397     if self.op.add_uids:
3398       uidpool.CheckUidPool(self.op.add_uids)
3399
3400     if self.op.remove_uids:
3401       uidpool.CheckUidPool(self.op.remove_uids)
3402
3403   def ExpandNames(self):
3404     # FIXME: in the future maybe other cluster params won't require checking on
3405     # all nodes to be modified.
3406     self.needed_locks = {
3407       locking.LEVEL_NODE: locking.ALL_SET,
3408     }
3409     self.share_locks[locking.LEVEL_NODE] = 1
3410
3411   def BuildHooksEnv(self):
3412     """Build hooks env.
3413
3414     """
3415     return {
3416       "OP_TARGET": self.cfg.GetClusterName(),
3417       "NEW_VG_NAME": self.op.vg_name,
3418       }
3419
3420   def BuildHooksNodes(self):
3421     """Build hooks nodes.
3422
3423     """
3424     mn = self.cfg.GetMasterNode()
3425     return ([mn], [mn])
3426
3427   def CheckPrereq(self):
3428     """Check prerequisites.
3429
3430     This checks whether the given params don't conflict and
3431     if the given volume group is valid.
3432
3433     """
3434     if self.op.vg_name is not None and not self.op.vg_name:
3435       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3436         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3437                                    " instances exist", errors.ECODE_INVAL)
3438
3439     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3440       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3441         raise errors.OpPrereqError("Cannot disable drbd helper while"
3442                                    " drbd-based instances exist",
3443                                    errors.ECODE_INVAL)
3444
3445     node_list = self.owned_locks(locking.LEVEL_NODE)
3446
3447     # if vg_name not None, checks given volume group on all nodes
3448     if self.op.vg_name:
3449       vglist = self.rpc.call_vg_list(node_list)
3450       for node in node_list:
3451         msg = vglist[node].fail_msg
3452         if msg:
3453           # ignoring down node
3454           self.LogWarning("Error while gathering data on node %s"
3455                           " (ignoring node): %s", node, msg)
3456           continue
3457         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3458                                               self.op.vg_name,
3459                                               constants.MIN_VG_SIZE)
3460         if vgstatus:
3461           raise errors.OpPrereqError("Error on node '%s': %s" %
3462                                      (node, vgstatus), errors.ECODE_ENVIRON)
3463
3464     if self.op.drbd_helper:
3465       # checks given drbd helper on all nodes
3466       helpers = self.rpc.call_drbd_helper(node_list)
3467       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3468         if ninfo.offline:
3469           self.LogInfo("Not checking drbd helper on offline node %s", node)
3470           continue
3471         msg = helpers[node].fail_msg
3472         if msg:
3473           raise errors.OpPrereqError("Error checking drbd helper on node"
3474                                      " '%s': %s" % (node, msg),
3475                                      errors.ECODE_ENVIRON)
3476         node_helper = helpers[node].payload
3477         if node_helper != self.op.drbd_helper:
3478           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3479                                      (node, node_helper), errors.ECODE_ENVIRON)
3480
3481     self.cluster = cluster = self.cfg.GetClusterInfo()
3482     # validate params changes
3483     if self.op.beparams:
3484       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3485       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3486
3487     if self.op.ndparams:
3488       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3489       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3490
3491       # TODO: we need a more general way to handle resetting
3492       # cluster-level parameters to default values
3493       if self.new_ndparams["oob_program"] == "":
3494         self.new_ndparams["oob_program"] = \
3495             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3496
3497     if self.op.nicparams:
3498       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3499       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3500       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3501       nic_errors = []
3502
3503       # check all instances for consistency
3504       for instance in self.cfg.GetAllInstancesInfo().values():
3505         for nic_idx, nic in enumerate(instance.nics):
3506           params_copy = copy.deepcopy(nic.nicparams)
3507           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3508
3509           # check parameter syntax
3510           try:
3511             objects.NIC.CheckParameterSyntax(params_filled)
3512           except errors.ConfigurationError, err:
3513             nic_errors.append("Instance %s, nic/%d: %s" %
3514                               (instance.name, nic_idx, err))
3515
3516           # if we're moving instances to routed, check that they have an ip
3517           target_mode = params_filled[constants.NIC_MODE]
3518           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3519             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3520                               " address" % (instance.name, nic_idx))
3521       if nic_errors:
3522         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3523                                    "\n".join(nic_errors))
3524
3525     # hypervisor list/parameters
3526     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3527     if self.op.hvparams:
3528       for hv_name, hv_dict in self.op.hvparams.items():
3529         if hv_name not in self.new_hvparams:
3530           self.new_hvparams[hv_name] = hv_dict
3531         else:
3532           self.new_hvparams[hv_name].update(hv_dict)
3533
3534     # os hypervisor parameters
3535     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3536     if self.op.os_hvp:
3537       for os_name, hvs in self.op.os_hvp.items():
3538         if os_name not in self.new_os_hvp:
3539           self.new_os_hvp[os_name] = hvs
3540         else:
3541           for hv_name, hv_dict in hvs.items():
3542             if hv_name not in self.new_os_hvp[os_name]:
3543               self.new_os_hvp[os_name][hv_name] = hv_dict
3544             else:
3545               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3546
3547     # os parameters
3548     self.new_osp = objects.FillDict(cluster.osparams, {})
3549     if self.op.osparams:
3550       for os_name, osp in self.op.osparams.items():
3551         if os_name not in self.new_osp:
3552           self.new_osp[os_name] = {}
3553
3554         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3555                                                   use_none=True)
3556
3557         if not self.new_osp[os_name]:
3558           # we removed all parameters
3559           del self.new_osp[os_name]
3560         else:
3561           # check the parameter validity (remote check)
3562           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3563                          os_name, self.new_osp[os_name])
3564
3565     # changes to the hypervisor list
3566     if self.op.enabled_hypervisors is not None:
3567       self.hv_list = self.op.enabled_hypervisors
3568       for hv in self.hv_list:
3569         # if the hypervisor doesn't already exist in the cluster
3570         # hvparams, we initialize it to empty, and then (in both
3571         # cases) we make sure to fill the defaults, as we might not
3572         # have a complete defaults list if the hypervisor wasn't
3573         # enabled before
3574         if hv not in new_hvp:
3575           new_hvp[hv] = {}
3576         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3577         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3578     else:
3579       self.hv_list = cluster.enabled_hypervisors
3580
3581     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3582       # either the enabled list has changed, or the parameters have, validate
3583       for hv_name, hv_params in self.new_hvparams.items():
3584         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3585             (self.op.enabled_hypervisors and
3586              hv_name in self.op.enabled_hypervisors)):
3587           # either this is a new hypervisor, or its parameters have changed
3588           hv_class = hypervisor.GetHypervisor(hv_name)
3589           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3590           hv_class.CheckParameterSyntax(hv_params)
3591           _CheckHVParams(self, node_list, hv_name, hv_params)
3592
3593     if self.op.os_hvp:
3594       # no need to check any newly-enabled hypervisors, since the
3595       # defaults have already been checked in the above code-block
3596       for os_name, os_hvp in self.new_os_hvp.items():
3597         for hv_name, hv_params in os_hvp.items():
3598           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3599           # we need to fill in the new os_hvp on top of the actual hv_p
3600           cluster_defaults = self.new_hvparams.get(hv_name, {})
3601           new_osp = objects.FillDict(cluster_defaults, hv_params)
3602           hv_class = hypervisor.GetHypervisor(hv_name)
3603           hv_class.CheckParameterSyntax(new_osp)
3604           _CheckHVParams(self, node_list, hv_name, new_osp)
3605
3606     if self.op.default_iallocator:
3607       alloc_script = utils.FindFile(self.op.default_iallocator,
3608                                     constants.IALLOCATOR_SEARCH_PATH,
3609                                     os.path.isfile)
3610       if alloc_script is None:
3611         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3612                                    " specified" % self.op.default_iallocator,
3613                                    errors.ECODE_INVAL)
3614
3615   def Exec(self, feedback_fn):
3616     """Change the parameters of the cluster.
3617
3618     """
3619     if self.op.vg_name is not None:
3620       new_volume = self.op.vg_name
3621       if not new_volume:
3622         new_volume = None
3623       if new_volume != self.cfg.GetVGName():
3624         self.cfg.SetVGName(new_volume)
3625       else:
3626         feedback_fn("Cluster LVM configuration already in desired"
3627                     " state, not changing")
3628     if self.op.drbd_helper is not None:
3629       new_helper = self.op.drbd_helper
3630       if not new_helper:
3631         new_helper = None
3632       if new_helper != self.cfg.GetDRBDHelper():
3633         self.cfg.SetDRBDHelper(new_helper)
3634       else:
3635         feedback_fn("Cluster DRBD helper already in desired state,"
3636                     " not changing")
3637     if self.op.hvparams:
3638       self.cluster.hvparams = self.new_hvparams
3639     if self.op.os_hvp:
3640       self.cluster.os_hvp = self.new_os_hvp
3641     if self.op.enabled_hypervisors is not None:
3642       self.cluster.hvparams = self.new_hvparams
3643       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3644     if self.op.beparams:
3645       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3646     if self.op.nicparams:
3647       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3648     if self.op.osparams:
3649       self.cluster.osparams = self.new_osp
3650     if self.op.ndparams:
3651       self.cluster.ndparams = self.new_ndparams
3652
3653     if self.op.candidate_pool_size is not None:
3654       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3655       # we need to update the pool size here, otherwise the save will fail
3656       _AdjustCandidatePool(self, [])
3657
3658     if self.op.maintain_node_health is not None:
3659       self.cluster.maintain_node_health = self.op.maintain_node_health
3660
3661     if self.op.prealloc_wipe_disks is not None:
3662       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3663
3664     if self.op.add_uids is not None:
3665       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3666
3667     if self.op.remove_uids is not None:
3668       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3669
3670     if self.op.uid_pool is not None:
3671       self.cluster.uid_pool = self.op.uid_pool
3672
3673     if self.op.default_iallocator is not None:
3674       self.cluster.default_iallocator = self.op.default_iallocator
3675
3676     if self.op.reserved_lvs is not None:
3677       self.cluster.reserved_lvs = self.op.reserved_lvs
3678
3679     def helper_os(aname, mods, desc):
3680       desc += " OS list"
3681       lst = getattr(self.cluster, aname)
3682       for key, val in mods:
3683         if key == constants.DDM_ADD:
3684           if val in lst:
3685             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3686           else:
3687             lst.append(val)
3688         elif key == constants.DDM_REMOVE:
3689           if val in lst:
3690             lst.remove(val)
3691           else:
3692             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3693         else:
3694           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3695
3696     if self.op.hidden_os:
3697       helper_os("hidden_os", self.op.hidden_os, "hidden")
3698
3699     if self.op.blacklisted_os:
3700       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3701
3702     if self.op.master_netdev:
3703       master = self.cfg.GetMasterNode()
3704       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3705                   self.cluster.master_netdev)
3706       result = self.rpc.call_node_stop_master(master, False)
3707       result.Raise("Could not disable the master ip")
3708       feedback_fn("Changing master_netdev from %s to %s" %
3709                   (self.cluster.master_netdev, self.op.master_netdev))
3710       self.cluster.master_netdev = self.op.master_netdev
3711
3712     self.cfg.Update(self.cluster, feedback_fn)
3713
3714     if self.op.master_netdev:
3715       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3716                   self.op.master_netdev)
3717       result = self.rpc.call_node_start_master(master, False, False)
3718       if result.fail_msg:
3719         self.LogWarning("Could not re-enable the master ip on"
3720                         " the master, please restart manually: %s",
3721                         result.fail_msg)
3722
3723
3724 def _UploadHelper(lu, nodes, fname):
3725   """Helper for uploading a file and showing warnings.
3726
3727   """
3728   if os.path.exists(fname):
3729     result = lu.rpc.call_upload_file(nodes, fname)
3730     for to_node, to_result in result.items():
3731       msg = to_result.fail_msg
3732       if msg:
3733         msg = ("Copy of file %s to node %s failed: %s" %
3734                (fname, to_node, msg))
3735         lu.proc.LogWarning(msg)
3736
3737
3738 def _ComputeAncillaryFiles(cluster, redist):
3739   """Compute files external to Ganeti which need to be consistent.
3740
3741   @type redist: boolean
3742   @param redist: Whether to include files which need to be redistributed
3743
3744   """
3745   # Compute files for all nodes
3746   files_all = set([
3747     constants.SSH_KNOWN_HOSTS_FILE,
3748     constants.CONFD_HMAC_KEY,
3749     constants.CLUSTER_DOMAIN_SECRET_FILE,
3750     ])
3751
3752   if not redist:
3753     files_all.update(constants.ALL_CERT_FILES)
3754     files_all.update(ssconf.SimpleStore().GetFileList())
3755   else:
3756     # we need to ship at least the RAPI certificate
3757     files_all.add(constants.RAPI_CERT_FILE)
3758
3759   if cluster.modify_etc_hosts:
3760     files_all.add(constants.ETC_HOSTS)
3761
3762   # Files which must either exist on all nodes or on none
3763   files_all_opt = set([
3764     constants.RAPI_USERS_FILE,
3765     ])
3766
3767   # Files which should only be on master candidates
3768   files_mc = set()
3769   if not redist:
3770     files_mc.add(constants.CLUSTER_CONF_FILE)
3771
3772   # Files which should only be on VM-capable nodes
3773   files_vm = set(filename
3774     for hv_name in cluster.enabled_hypervisors
3775     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3776
3777   # Filenames must be unique
3778   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3779           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3780          "Found file listed in more than one file list"
3781
3782   return (files_all, files_all_opt, files_mc, files_vm)
3783
3784
3785 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3786   """Distribute additional files which are part of the cluster configuration.
3787
3788   ConfigWriter takes care of distributing the config and ssconf files, but
3789   there are more files which should be distributed to all nodes. This function
3790   makes sure those are copied.
3791
3792   @param lu: calling logical unit
3793   @param additional_nodes: list of nodes not in the config to distribute to
3794   @type additional_vm: boolean
3795   @param additional_vm: whether the additional nodes are vm-capable or not
3796
3797   """
3798   # Gather target nodes
3799   cluster = lu.cfg.GetClusterInfo()
3800   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3801
3802   online_nodes = lu.cfg.GetOnlineNodeList()
3803   vm_nodes = lu.cfg.GetVmCapableNodeList()
3804
3805   if additional_nodes is not None:
3806     online_nodes.extend(additional_nodes)
3807     if additional_vm:
3808       vm_nodes.extend(additional_nodes)
3809
3810   # Never distribute to master node
3811   for nodelist in [online_nodes, vm_nodes]:
3812     if master_info.name in nodelist:
3813       nodelist.remove(master_info.name)
3814
3815   # Gather file lists
3816   (files_all, files_all_opt, files_mc, files_vm) = \
3817     _ComputeAncillaryFiles(cluster, True)
3818
3819   # Never re-distribute configuration file from here
3820   assert not (constants.CLUSTER_CONF_FILE in files_all or
3821               constants.CLUSTER_CONF_FILE in files_vm)
3822   assert not files_mc, "Master candidates not handled in this function"
3823
3824   filemap = [
3825     (online_nodes, files_all),
3826     (online_nodes, files_all_opt),
3827     (vm_nodes, files_vm),
3828     ]
3829
3830   # Upload the files
3831   for (node_list, files) in filemap:
3832     for fname in files:
3833       _UploadHelper(lu, node_list, fname)
3834
3835
3836 class LUClusterRedistConf(NoHooksLU):
3837   """Force the redistribution of cluster configuration.
3838
3839   This is a very simple LU.
3840
3841   """
3842   REQ_BGL = False
3843
3844   def ExpandNames(self):
3845     self.needed_locks = {
3846       locking.LEVEL_NODE: locking.ALL_SET,
3847     }
3848     self.share_locks[locking.LEVEL_NODE] = 1
3849
3850   def Exec(self, feedback_fn):
3851     """Redistribute the configuration.
3852
3853     """
3854     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3855     _RedistributeAncillaryFiles(self)
3856
3857
3858 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3859   """Sleep and poll for an instance's disk to sync.
3860
3861   """
3862   if not instance.disks or disks is not None and not disks:
3863     return True
3864
3865   disks = _ExpandCheckDisks(instance, disks)
3866
3867   if not oneshot:
3868     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3869
3870   node = instance.primary_node
3871
3872   for dev in disks:
3873     lu.cfg.SetDiskID(dev, node)
3874
3875   # TODO: Convert to utils.Retry
3876
3877   retries = 0
3878   degr_retries = 10 # in seconds, as we sleep 1 second each time
3879   while True:
3880     max_time = 0
3881     done = True
3882     cumul_degraded = False
3883     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3884     msg = rstats.fail_msg
3885     if msg:
3886       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3887       retries += 1
3888       if retries >= 10:
3889         raise errors.RemoteError("Can't contact node %s for mirror data,"
3890                                  " aborting." % node)
3891       time.sleep(6)
3892       continue
3893     rstats = rstats.payload
3894     retries = 0
3895     for i, mstat in enumerate(rstats):
3896       if mstat is None:
3897         lu.LogWarning("Can't compute data for node %s/%s",
3898                            node, disks[i].iv_name)
3899         continue
3900
3901       cumul_degraded = (cumul_degraded or
3902                         (mstat.is_degraded and mstat.sync_percent is None))
3903       if mstat.sync_percent is not None:
3904         done = False
3905         if mstat.estimated_time is not None:
3906           rem_time = ("%s remaining (estimated)" %
3907                       utils.FormatSeconds(mstat.estimated_time))
3908           max_time = mstat.estimated_time
3909         else:
3910           rem_time = "no time estimate"
3911         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3912                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3913
3914     # if we're done but degraded, let's do a few small retries, to
3915     # make sure we see a stable and not transient situation; therefore
3916     # we force restart of the loop
3917     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3918       logging.info("Degraded disks found, %d retries left", degr_retries)
3919       degr_retries -= 1
3920       time.sleep(1)
3921       continue
3922
3923     if done or oneshot:
3924       break
3925
3926     time.sleep(min(60, max_time))
3927
3928   if done:
3929     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3930   return not cumul_degraded
3931
3932
3933 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3934   """Check that mirrors are not degraded.
3935
3936   The ldisk parameter, if True, will change the test from the
3937   is_degraded attribute (which represents overall non-ok status for
3938   the device(s)) to the ldisk (representing the local storage status).
3939
3940   """
3941   lu.cfg.SetDiskID(dev, node)
3942
3943   result = True
3944
3945   if on_primary or dev.AssembleOnSecondary():
3946     rstats = lu.rpc.call_blockdev_find(node, dev)
3947     msg = rstats.fail_msg
3948     if msg:
3949       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3950       result = False
3951     elif not rstats.payload:
3952       lu.LogWarning("Can't find disk on node %s", node)
3953       result = False
3954     else:
3955       if ldisk:
3956         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3957       else:
3958         result = result and not rstats.payload.is_degraded
3959
3960   if dev.children:
3961     for child in dev.children:
3962       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3963
3964   return result
3965
3966
3967 class LUOobCommand(NoHooksLU):
3968   """Logical unit for OOB handling.
3969
3970   """
3971   REQ_BGL = False
3972   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3973
3974   def ExpandNames(self):
3975     """Gather locks we need.
3976
3977     """
3978     if self.op.node_names:
3979       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3980       lock_names = self.op.node_names
3981     else:
3982       lock_names = locking.ALL_SET
3983
3984     self.needed_locks = {
3985       locking.LEVEL_NODE: lock_names,
3986       }
3987
3988   def CheckPrereq(self):
3989     """Check prerequisites.
3990
3991     This checks:
3992      - the node exists in the configuration
3993      - OOB is supported
3994
3995     Any errors are signaled by raising errors.OpPrereqError.
3996
3997     """
3998     self.nodes = []
3999     self.master_node = self.cfg.GetMasterNode()
4000
4001     assert self.op.power_delay >= 0.0
4002
4003     if self.op.node_names:
4004       if (self.op.command in self._SKIP_MASTER and
4005           self.master_node in self.op.node_names):
4006         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4007         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4008
4009         if master_oob_handler:
4010           additional_text = ("run '%s %s %s' if you want to operate on the"
4011                              " master regardless") % (master_oob_handler,
4012                                                       self.op.command,
4013                                                       self.master_node)
4014         else:
4015           additional_text = "it does not support out-of-band operations"
4016
4017         raise errors.OpPrereqError(("Operating on the master node %s is not"
4018                                     " allowed for %s; %s") %
4019                                    (self.master_node, self.op.command,
4020                                     additional_text), errors.ECODE_INVAL)
4021     else:
4022       self.op.node_names = self.cfg.GetNodeList()
4023       if self.op.command in self._SKIP_MASTER:
4024         self.op.node_names.remove(self.master_node)
4025
4026     if self.op.command in self._SKIP_MASTER:
4027       assert self.master_node not in self.op.node_names
4028
4029     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4030       if node is None:
4031         raise errors.OpPrereqError("Node %s not found" % node_name,
4032                                    errors.ECODE_NOENT)
4033       else:
4034         self.nodes.append(node)
4035
4036       if (not self.op.ignore_status and
4037           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4038         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4039                                     " not marked offline") % node_name,
4040                                    errors.ECODE_STATE)
4041
4042   def Exec(self, feedback_fn):
4043     """Execute OOB and return result if we expect any.
4044
4045     """
4046     master_node = self.master_node
4047     ret = []
4048
4049     for idx, node in enumerate(utils.NiceSort(self.nodes,
4050                                               key=lambda node: node.name)):
4051       node_entry = [(constants.RS_NORMAL, node.name)]
4052       ret.append(node_entry)
4053
4054       oob_program = _SupportsOob(self.cfg, node)
4055
4056       if not oob_program:
4057         node_entry.append((constants.RS_UNAVAIL, None))
4058         continue
4059
4060       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4061                    self.op.command, oob_program, node.name)
4062       result = self.rpc.call_run_oob(master_node, oob_program,
4063                                      self.op.command, node.name,
4064                                      self.op.timeout)
4065
4066       if result.fail_msg:
4067         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4068                         node.name, result.fail_msg)
4069         node_entry.append((constants.RS_NODATA, None))
4070       else:
4071         try:
4072           self._CheckPayload(result)
4073         except errors.OpExecError, err:
4074           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4075                           node.name, err)
4076           node_entry.append((constants.RS_NODATA, None))
4077         else:
4078           if self.op.command == constants.OOB_HEALTH:
4079             # For health we should log important events
4080             for item, status in result.payload:
4081               if status in [constants.OOB_STATUS_WARNING,
4082                             constants.OOB_STATUS_CRITICAL]:
4083                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4084                                 item, node.name, status)
4085
4086           if self.op.command == constants.OOB_POWER_ON:
4087             node.powered = True
4088           elif self.op.command == constants.OOB_POWER_OFF:
4089             node.powered = False
4090           elif self.op.command == constants.OOB_POWER_STATUS:
4091             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4092             if powered != node.powered:
4093               logging.warning(("Recorded power state (%s) of node '%s' does not"
4094                                " match actual power state (%s)"), node.powered,
4095                               node.name, powered)
4096
4097           # For configuration changing commands we should update the node
4098           if self.op.command in (constants.OOB_POWER_ON,
4099                                  constants.OOB_POWER_OFF):
4100             self.cfg.Update(node, feedback_fn)
4101
4102           node_entry.append((constants.RS_NORMAL, result.payload))
4103
4104           if (self.op.command == constants.OOB_POWER_ON and
4105               idx < len(self.nodes) - 1):
4106             time.sleep(self.op.power_delay)
4107
4108     return ret
4109
4110   def _CheckPayload(self, result):
4111     """Checks if the payload is valid.
4112
4113     @param result: RPC result
4114     @raises errors.OpExecError: If payload is not valid
4115
4116     """
4117     errs = []
4118     if self.op.command == constants.OOB_HEALTH:
4119       if not isinstance(result.payload, list):
4120         errs.append("command 'health' is expected to return a list but got %s" %
4121                     type(result.payload))
4122       else:
4123         for item, status in result.payload:
4124           if status not in constants.OOB_STATUSES:
4125             errs.append("health item '%s' has invalid status '%s'" %
4126                         (item, status))
4127
4128     if self.op.command == constants.OOB_POWER_STATUS:
4129       if not isinstance(result.payload, dict):
4130         errs.append("power-status is expected to return a dict but got %s" %
4131                     type(result.payload))
4132
4133     if self.op.command in [
4134         constants.OOB_POWER_ON,
4135         constants.OOB_POWER_OFF,
4136         constants.OOB_POWER_CYCLE,
4137         ]:
4138       if result.payload is not None:
4139         errs.append("%s is expected to not return payload but got '%s'" %
4140                     (self.op.command, result.payload))
4141
4142     if errs:
4143       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4144                                utils.CommaJoin(errs))
4145
4146
4147 class _OsQuery(_QueryBase):
4148   FIELDS = query.OS_FIELDS
4149
4150   def ExpandNames(self, lu):
4151     # Lock all nodes in shared mode
4152     # Temporary removal of locks, should be reverted later
4153     # TODO: reintroduce locks when they are lighter-weight
4154     lu.needed_locks = {}
4155     #self.share_locks[locking.LEVEL_NODE] = 1
4156     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4157
4158     # The following variables interact with _QueryBase._GetNames
4159     if self.names:
4160       self.wanted = self.names
4161     else:
4162       self.wanted = locking.ALL_SET
4163
4164     self.do_locking = self.use_locking
4165
4166   def DeclareLocks(self, lu, level):
4167     pass
4168
4169   @staticmethod
4170   def _DiagnoseByOS(rlist):
4171     """Remaps a per-node return list into an a per-os per-node dictionary
4172
4173     @param rlist: a map with node names as keys and OS objects as values
4174
4175     @rtype: dict
4176     @return: a dictionary with osnames as keys and as value another
4177         map, with nodes as keys and tuples of (path, status, diagnose,
4178         variants, parameters, api_versions) as values, eg::
4179
4180           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4181                                      (/srv/..., False, "invalid api")],
4182                            "node2": [(/srv/..., True, "", [], [])]}
4183           }
4184
4185     """
4186     all_os = {}
4187     # we build here the list of nodes that didn't fail the RPC (at RPC
4188     # level), so that nodes with a non-responding node daemon don't
4189     # make all OSes invalid
4190     good_nodes = [node_name for node_name in rlist
4191                   if not rlist[node_name].fail_msg]
4192     for node_name, nr in rlist.items():
4193       if nr.fail_msg or not nr.payload:
4194         continue
4195       for (name, path, status, diagnose, variants,
4196            params, api_versions) in nr.payload:
4197         if name not in all_os:
4198           # build a list of nodes for this os containing empty lists
4199           # for each node in node_list
4200           all_os[name] = {}
4201           for nname in good_nodes:
4202             all_os[name][nname] = []
4203         # convert params from [name, help] to (name, help)
4204         params = [tuple(v) for v in params]
4205         all_os[name][node_name].append((path, status, diagnose,
4206                                         variants, params, api_versions))
4207     return all_os
4208
4209   def _GetQueryData(self, lu):
4210     """Computes the list of nodes and their attributes.
4211
4212     """
4213     # Locking is not used
4214     assert not (compat.any(lu.glm.is_owned(level)
4215                            for level in locking.LEVELS
4216                            if level != locking.LEVEL_CLUSTER) or
4217                 self.do_locking or self.use_locking)
4218
4219     valid_nodes = [node.name
4220                    for node in lu.cfg.GetAllNodesInfo().values()
4221                    if not node.offline and node.vm_capable]
4222     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4223     cluster = lu.cfg.GetClusterInfo()
4224
4225     data = {}
4226
4227     for (os_name, os_data) in pol.items():
4228       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4229                           hidden=(os_name in cluster.hidden_os),
4230                           blacklisted=(os_name in cluster.blacklisted_os))
4231
4232       variants = set()
4233       parameters = set()
4234       api_versions = set()
4235
4236       for idx, osl in enumerate(os_data.values()):
4237         info.valid = bool(info.valid and osl and osl[0][1])
4238         if not info.valid:
4239           break
4240
4241         (node_variants, node_params, node_api) = osl[0][3:6]
4242         if idx == 0:
4243           # First entry
4244           variants.update(node_variants)
4245           parameters.update(node_params)
4246           api_versions.update(node_api)
4247         else:
4248           # Filter out inconsistent values
4249           variants.intersection_update(node_variants)
4250           parameters.intersection_update(node_params)
4251           api_versions.intersection_update(node_api)
4252
4253       info.variants = list(variants)
4254       info.parameters = list(parameters)
4255       info.api_versions = list(api_versions)
4256
4257       data[os_name] = info
4258
4259     # Prepare data in requested order
4260     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4261             if name in data]
4262
4263
4264 class LUOsDiagnose(NoHooksLU):
4265   """Logical unit for OS diagnose/query.
4266
4267   """
4268   REQ_BGL = False
4269
4270   @staticmethod
4271   def _BuildFilter(fields, names):
4272     """Builds a filter for querying OSes.
4273
4274     """
4275     name_filter = qlang.MakeSimpleFilter("name", names)
4276
4277     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4278     # respective field is not requested
4279     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4280                      for fname in ["hidden", "blacklisted"]
4281                      if fname not in fields]
4282     if "valid" not in fields:
4283       status_filter.append([qlang.OP_TRUE, "valid"])
4284
4285     if status_filter:
4286       status_filter.insert(0, qlang.OP_AND)
4287     else:
4288       status_filter = None
4289
4290     if name_filter and status_filter:
4291       return [qlang.OP_AND, name_filter, status_filter]
4292     elif name_filter:
4293       return name_filter
4294     else:
4295       return status_filter
4296
4297   def CheckArguments(self):
4298     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4299                        self.op.output_fields, False)
4300
4301   def ExpandNames(self):
4302     self.oq.ExpandNames(self)
4303
4304   def Exec(self, feedback_fn):
4305     return self.oq.OldStyleQuery(self)
4306
4307
4308 class LUNodeRemove(LogicalUnit):
4309   """Logical unit for removing a node.
4310
4311   """
4312   HPATH = "node-remove"
4313   HTYPE = constants.HTYPE_NODE
4314
4315   def BuildHooksEnv(self):
4316     """Build hooks env.
4317
4318     This doesn't run on the target node in the pre phase as a failed
4319     node would then be impossible to remove.
4320
4321     """
4322     return {
4323       "OP_TARGET": self.op.node_name,
4324       "NODE_NAME": self.op.node_name,
4325       }
4326
4327   def BuildHooksNodes(self):
4328     """Build hooks nodes.
4329
4330     """
4331     all_nodes = self.cfg.GetNodeList()
4332     try:
4333       all_nodes.remove(self.op.node_name)
4334     except ValueError:
4335       logging.warning("Node '%s', which is about to be removed, was not found"
4336                       " in the list of all nodes", self.op.node_name)
4337     return (all_nodes, all_nodes)
4338
4339   def CheckPrereq(self):
4340     """Check prerequisites.
4341
4342     This checks:
4343      - the node exists in the configuration
4344      - it does not have primary or secondary instances
4345      - it's not the master
4346
4347     Any errors are signaled by raising errors.OpPrereqError.
4348
4349     """
4350     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4351     node = self.cfg.GetNodeInfo(self.op.node_name)
4352     assert node is not None
4353
4354     masternode = self.cfg.GetMasterNode()
4355     if node.name == masternode:
4356       raise errors.OpPrereqError("Node is the master node, failover to another"
4357                                  " node is required", errors.ECODE_INVAL)
4358
4359     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4360       if node.name in instance.all_nodes:
4361         raise errors.OpPrereqError("Instance %s is still running on the node,"
4362                                    " please remove first" % instance_name,
4363                                    errors.ECODE_INVAL)
4364     self.op.node_name = node.name
4365     self.node = node
4366
4367   def Exec(self, feedback_fn):
4368     """Removes the node from the cluster.
4369
4370     """
4371     node = self.node
4372     logging.info("Stopping the node daemon and removing configs from node %s",
4373                  node.name)
4374
4375     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4376
4377     # Promote nodes to master candidate as needed
4378     _AdjustCandidatePool(self, exceptions=[node.name])
4379     self.context.RemoveNode(node.name)
4380
4381     # Run post hooks on the node before it's removed
4382     _RunPostHook(self, node.name)
4383
4384     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4385     msg = result.fail_msg
4386     if msg:
4387       self.LogWarning("Errors encountered on the remote node while leaving"
4388                       " the cluster: %s", msg)
4389
4390     # Remove node from our /etc/hosts
4391     if self.cfg.GetClusterInfo().modify_etc_hosts:
4392       master_node = self.cfg.GetMasterNode()
4393       result = self.rpc.call_etc_hosts_modify(master_node,
4394                                               constants.ETC_HOSTS_REMOVE,
4395                                               node.name, None)
4396       result.Raise("Can't update hosts file with new host data")
4397       _RedistributeAncillaryFiles(self)
4398
4399
4400 class _NodeQuery(_QueryBase):
4401   FIELDS = query.NODE_FIELDS
4402
4403   def ExpandNames(self, lu):
4404     lu.needed_locks = {}
4405     lu.share_locks = _ShareAll()
4406
4407     if self.names:
4408       self.wanted = _GetWantedNodes(lu, self.names)
4409     else:
4410       self.wanted = locking.ALL_SET
4411
4412     self.do_locking = (self.use_locking and
4413                        query.NQ_LIVE in self.requested_data)
4414
4415     if self.do_locking:
4416       # If any non-static field is requested we need to lock the nodes
4417       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4418
4419   def DeclareLocks(self, lu, level):
4420     pass
4421
4422   def _GetQueryData(self, lu):
4423     """Computes the list of nodes and their attributes.
4424
4425     """
4426     all_info = lu.cfg.GetAllNodesInfo()
4427
4428     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4429
4430     # Gather data as requested
4431     if query.NQ_LIVE in self.requested_data:
4432       # filter out non-vm_capable nodes
4433       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4434
4435       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4436                                         lu.cfg.GetHypervisorType())
4437       live_data = dict((name, nresult.payload)
4438                        for (name, nresult) in node_data.items()
4439                        if not nresult.fail_msg and nresult.payload)
4440     else:
4441       live_data = None
4442
4443     if query.NQ_INST in self.requested_data:
4444       node_to_primary = dict([(name, set()) for name in nodenames])
4445       node_to_secondary = dict([(name, set()) for name in nodenames])
4446
4447       inst_data = lu.cfg.GetAllInstancesInfo()
4448
4449       for inst in inst_data.values():
4450         if inst.primary_node in node_to_primary:
4451           node_to_primary[inst.primary_node].add(inst.name)
4452         for secnode in inst.secondary_nodes:
4453           if secnode in node_to_secondary:
4454             node_to_secondary[secnode].add(inst.name)
4455     else:
4456       node_to_primary = None
4457       node_to_secondary = None
4458
4459     if query.NQ_OOB in self.requested_data:
4460       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4461                          for name, node in all_info.iteritems())
4462     else:
4463       oob_support = None
4464
4465     if query.NQ_GROUP in self.requested_data:
4466       groups = lu.cfg.GetAllNodeGroupsInfo()
4467     else:
4468       groups = {}
4469
4470     return query.NodeQueryData([all_info[name] for name in nodenames],
4471                                live_data, lu.cfg.GetMasterNode(),
4472                                node_to_primary, node_to_secondary, groups,
4473                                oob_support, lu.cfg.GetClusterInfo())
4474
4475
4476 class LUNodeQuery(NoHooksLU):
4477   """Logical unit for querying nodes.
4478
4479   """
4480   # pylint: disable=W0142
4481   REQ_BGL = False
4482
4483   def CheckArguments(self):
4484     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4485                          self.op.output_fields, self.op.use_locking)
4486
4487   def ExpandNames(self):
4488     self.nq.ExpandNames(self)
4489
4490   def Exec(self, feedback_fn):
4491     return self.nq.OldStyleQuery(self)
4492
4493
4494 class LUNodeQueryvols(NoHooksLU):
4495   """Logical unit for getting volumes on node(s).
4496
4497   """
4498   REQ_BGL = False
4499   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4500   _FIELDS_STATIC = utils.FieldSet("node")
4501
4502   def CheckArguments(self):
4503     _CheckOutputFields(static=self._FIELDS_STATIC,
4504                        dynamic=self._FIELDS_DYNAMIC,
4505                        selected=self.op.output_fields)
4506
4507   def ExpandNames(self):
4508     self.needed_locks = {}
4509     self.share_locks[locking.LEVEL_NODE] = 1
4510     if not self.op.nodes:
4511       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4512     else:
4513       self.needed_locks[locking.LEVEL_NODE] = \
4514         _GetWantedNodes(self, self.op.nodes)
4515
4516   def Exec(self, feedback_fn):
4517     """Computes the list of nodes and their attributes.
4518
4519     """
4520     nodenames = self.owned_locks(locking.LEVEL_NODE)
4521     volumes = self.rpc.call_node_volumes(nodenames)
4522
4523     ilist = self.cfg.GetAllInstancesInfo()
4524     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4525
4526     output = []
4527     for node in nodenames:
4528       nresult = volumes[node]
4529       if nresult.offline:
4530         continue
4531       msg = nresult.fail_msg
4532       if msg:
4533         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4534         continue
4535
4536       node_vols = sorted(nresult.payload,
4537                          key=operator.itemgetter("dev"))
4538
4539       for vol in node_vols:
4540         node_output = []
4541         for field in self.op.output_fields:
4542           if field == "node":
4543             val = node
4544           elif field == "phys":
4545             val = vol["dev"]
4546           elif field == "vg":
4547             val = vol["vg"]
4548           elif field == "name":
4549             val = vol["name"]
4550           elif field == "size":
4551             val = int(float(vol["size"]))
4552           elif field == "instance":
4553             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4554           else:
4555             raise errors.ParameterError(field)
4556           node_output.append(str(val))
4557
4558         output.append(node_output)
4559
4560     return output
4561
4562
4563 class LUNodeQueryStorage(NoHooksLU):
4564   """Logical unit for getting information on storage units on node(s).
4565
4566   """
4567   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4568   REQ_BGL = False
4569
4570   def CheckArguments(self):
4571     _CheckOutputFields(static=self._FIELDS_STATIC,
4572                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4573                        selected=self.op.output_fields)
4574
4575   def ExpandNames(self):
4576     self.needed_locks = {}
4577     self.share_locks[locking.LEVEL_NODE] = 1
4578
4579     if self.op.nodes:
4580       self.needed_locks[locking.LEVEL_NODE] = \
4581         _GetWantedNodes(self, self.op.nodes)
4582     else:
4583       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4584
4585   def Exec(self, feedback_fn):
4586     """Computes the list of nodes and their attributes.
4587
4588     """
4589     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4590
4591     # Always get name to sort by
4592     if constants.SF_NAME in self.op.output_fields:
4593       fields = self.op.output_fields[:]
4594     else:
4595       fields = [constants.SF_NAME] + self.op.output_fields
4596
4597     # Never ask for node or type as it's only known to the LU
4598     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4599       while extra in fields:
4600         fields.remove(extra)
4601
4602     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4603     name_idx = field_idx[constants.SF_NAME]
4604
4605     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4606     data = self.rpc.call_storage_list(self.nodes,
4607                                       self.op.storage_type, st_args,
4608                                       self.op.name, fields)
4609
4610     result = []
4611
4612     for node in utils.NiceSort(self.nodes):
4613       nresult = data[node]
4614       if nresult.offline:
4615         continue
4616
4617       msg = nresult.fail_msg
4618       if msg:
4619         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4620         continue
4621
4622       rows = dict([(row[name_idx], row) for row in nresult.payload])
4623
4624       for name in utils.NiceSort(rows.keys()):
4625         row = rows[name]
4626
4627         out = []
4628
4629         for field in self.op.output_fields:
4630           if field == constants.SF_NODE:
4631             val = node
4632           elif field == constants.SF_TYPE:
4633             val = self.op.storage_type
4634           elif field in field_idx:
4635             val = row[field_idx[field]]
4636           else:
4637             raise errors.ParameterError(field)
4638
4639           out.append(val)
4640
4641         result.append(out)
4642
4643     return result
4644
4645
4646 class _InstanceQuery(_QueryBase):
4647   FIELDS = query.INSTANCE_FIELDS
4648
4649   def ExpandNames(self, lu):
4650     lu.needed_locks = {}
4651     lu.share_locks = _ShareAll()
4652
4653     if self.names:
4654       self.wanted = _GetWantedInstances(lu, self.names)
4655     else:
4656       self.wanted = locking.ALL_SET
4657
4658     self.do_locking = (self.use_locking and
4659                        query.IQ_LIVE in self.requested_data)
4660     if self.do_locking:
4661       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4662       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4663       lu.needed_locks[locking.LEVEL_NODE] = []
4664       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4665
4666     self.do_grouplocks = (self.do_locking and
4667                           query.IQ_NODES in self.requested_data)
4668
4669   def DeclareLocks(self, lu, level):
4670     if self.do_locking:
4671       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4672         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4673
4674         # Lock all groups used by instances optimistically; this requires going
4675         # via the node before it's locked, requiring verification later on
4676         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4677           set(group_uuid
4678               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4679               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4680       elif level == locking.LEVEL_NODE:
4681         lu._LockInstancesNodes() # pylint: disable=W0212
4682
4683   @staticmethod
4684   def _CheckGroupLocks(lu):
4685     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4686     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4687
4688     # Check if node groups for locked instances are still correct
4689     for instance_name in owned_instances:
4690       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4691
4692   def _GetQueryData(self, lu):
4693     """Computes the list of instances and their attributes.
4694
4695     """
4696     if self.do_grouplocks:
4697       self._CheckGroupLocks(lu)
4698
4699     cluster = lu.cfg.GetClusterInfo()
4700     all_info = lu.cfg.GetAllInstancesInfo()
4701
4702     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4703
4704     instance_list = [all_info[name] for name in instance_names]
4705     nodes = frozenset(itertools.chain(*(inst.all_nodes
4706                                         for inst in instance_list)))
4707     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4708     bad_nodes = []
4709     offline_nodes = []
4710     wrongnode_inst = set()
4711
4712     # Gather data as requested
4713     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4714       live_data = {}
4715       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4716       for name in nodes:
4717         result = node_data[name]
4718         if result.offline:
4719           # offline nodes will be in both lists
4720           assert result.fail_msg
4721           offline_nodes.append(name)
4722         if result.fail_msg:
4723           bad_nodes.append(name)
4724         elif result.payload:
4725           for inst in result.payload:
4726             if inst in all_info:
4727               if all_info[inst].primary_node == name:
4728                 live_data.update(result.payload)
4729               else:
4730                 wrongnode_inst.add(inst)
4731             else:
4732               # orphan instance; we don't list it here as we don't
4733               # handle this case yet in the output of instance listing
4734               logging.warning("Orphan instance '%s' found on node %s",
4735                               inst, name)
4736         # else no instance is alive
4737     else:
4738       live_data = {}
4739
4740     if query.IQ_DISKUSAGE in self.requested_data:
4741       disk_usage = dict((inst.name,
4742                          _ComputeDiskSize(inst.disk_template,
4743                                           [{constants.IDISK_SIZE: disk.size}
4744                                            for disk in inst.disks]))
4745                         for inst in instance_list)
4746     else:
4747       disk_usage = None
4748
4749     if query.IQ_CONSOLE in self.requested_data:
4750       consinfo = {}
4751       for inst in instance_list:
4752         if inst.name in live_data:
4753           # Instance is running
4754           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4755         else:
4756           consinfo[inst.name] = None
4757       assert set(consinfo.keys()) == set(instance_names)
4758     else:
4759       consinfo = None
4760
4761     if query.IQ_NODES in self.requested_data:
4762       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4763                                             instance_list)))
4764       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4765       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4766                     for uuid in set(map(operator.attrgetter("group"),
4767                                         nodes.values())))
4768     else:
4769       nodes = None
4770       groups = None
4771
4772     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4773                                    disk_usage, offline_nodes, bad_nodes,
4774                                    live_data, wrongnode_inst, consinfo,
4775                                    nodes, groups)
4776
4777
4778 class LUQuery(NoHooksLU):
4779   """Query for resources/items of a certain kind.
4780
4781   """
4782   # pylint: disable=W0142
4783   REQ_BGL = False
4784
4785   def CheckArguments(self):
4786     qcls = _GetQueryImplementation(self.op.what)
4787
4788     self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4789
4790   def ExpandNames(self):
4791     self.impl.ExpandNames(self)
4792
4793   def DeclareLocks(self, level):
4794     self.impl.DeclareLocks(self, level)
4795
4796   def Exec(self, feedback_fn):
4797     return self.impl.NewStyleQuery(self)
4798
4799
4800 class LUQueryFields(NoHooksLU):
4801   """Query for resources/items of a certain kind.
4802
4803   """
4804   # pylint: disable=W0142
4805   REQ_BGL = False
4806
4807   def CheckArguments(self):
4808     self.qcls = _GetQueryImplementation(self.op.what)
4809
4810   def ExpandNames(self):
4811     self.needed_locks = {}
4812
4813   def Exec(self, feedback_fn):
4814     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4815
4816
4817 class LUNodeModifyStorage(NoHooksLU):
4818   """Logical unit for modifying a storage volume on a node.
4819
4820   """
4821   REQ_BGL = False
4822
4823   def CheckArguments(self):
4824     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4825
4826     storage_type = self.op.storage_type
4827
4828     try:
4829       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4830     except KeyError:
4831       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4832                                  " modified" % storage_type,
4833                                  errors.ECODE_INVAL)
4834
4835     diff = set(self.op.changes.keys()) - modifiable
4836     if diff:
4837       raise errors.OpPrereqError("The following fields can not be modified for"
4838                                  " storage units of type '%s': %r" %
4839                                  (storage_type, list(diff)),
4840                                  errors.ECODE_INVAL)
4841
4842   def ExpandNames(self):
4843     self.needed_locks = {
4844       locking.LEVEL_NODE: self.op.node_name,
4845       }
4846
4847   def Exec(self, feedback_fn):
4848     """Computes the list of nodes and their attributes.
4849
4850     """
4851     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4852     result = self.rpc.call_storage_modify(self.op.node_name,
4853                                           self.op.storage_type, st_args,
4854                                           self.op.name, self.op.changes)
4855     result.Raise("Failed to modify storage unit '%s' on %s" %
4856                  (self.op.name, self.op.node_name))
4857
4858
4859 class LUNodeAdd(LogicalUnit):
4860   """Logical unit for adding node to the cluster.
4861
4862   """
4863   HPATH = "node-add"
4864   HTYPE = constants.HTYPE_NODE
4865   _NFLAGS = ["master_capable", "vm_capable"]
4866
4867   def CheckArguments(self):
4868     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4869     # validate/normalize the node name
4870     self.hostname = netutils.GetHostname(name=self.op.node_name,
4871                                          family=self.primary_ip_family)
4872     self.op.node_name = self.hostname.name
4873
4874     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4875       raise errors.OpPrereqError("Cannot readd the master node",
4876                                  errors.ECODE_STATE)
4877
4878     if self.op.readd and self.op.group:
4879       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4880                                  " being readded", errors.ECODE_INVAL)
4881
4882   def BuildHooksEnv(self):
4883     """Build hooks env.
4884
4885     This will run on all nodes before, and on all nodes + the new node after.
4886
4887     """
4888     return {
4889       "OP_TARGET": self.op.node_name,
4890       "NODE_NAME": self.op.node_name,
4891       "NODE_PIP": self.op.primary_ip,
4892       "NODE_SIP": self.op.secondary_ip,
4893       "MASTER_CAPABLE": str(self.op.master_capable),
4894       "VM_CAPABLE": str(self.op.vm_capable),
4895       }
4896
4897   def BuildHooksNodes(self):
4898     """Build hooks nodes.
4899
4900     """
4901     # Exclude added node
4902     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4903     post_nodes = pre_nodes + [self.op.node_name, ]
4904
4905     return (pre_nodes, post_nodes)
4906
4907   def CheckPrereq(self):
4908     """Check prerequisites.
4909
4910     This checks:
4911      - the new node is not already in the config
4912      - it is resolvable
4913      - its parameters (single/dual homed) matches the cluster
4914
4915     Any errors are signaled by raising errors.OpPrereqError.
4916
4917     """
4918     cfg = self.cfg
4919     hostname = self.hostname
4920     node = hostname.name
4921     primary_ip = self.op.primary_ip = hostname.ip
4922     if self.op.secondary_ip is None:
4923       if self.primary_ip_family == netutils.IP6Address.family:
4924         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4925                                    " IPv4 address must be given as secondary",
4926                                    errors.ECODE_INVAL)
4927       self.op.secondary_ip = primary_ip
4928
4929     secondary_ip = self.op.secondary_ip
4930     if not netutils.IP4Address.IsValid(secondary_ip):
4931       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4932                                  " address" % secondary_ip, errors.ECODE_INVAL)
4933
4934     node_list = cfg.GetNodeList()
4935     if not self.op.readd and node in node_list:
4936       raise errors.OpPrereqError("Node %s is already in the configuration" %
4937                                  node, errors.ECODE_EXISTS)
4938     elif self.op.readd and node not in node_list:
4939       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4940                                  errors.ECODE_NOENT)
4941
4942     self.changed_primary_ip = False
4943
4944     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4945       if self.op.readd and node == existing_node_name:
4946         if existing_node.secondary_ip != secondary_ip:
4947           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4948                                      " address configuration as before",
4949                                      errors.ECODE_INVAL)
4950         if existing_node.primary_ip != primary_ip:
4951           self.changed_primary_ip = True
4952
4953         continue
4954
4955       if (existing_node.primary_ip == primary_ip or
4956           existing_node.secondary_ip == primary_ip or
4957           existing_node.primary_ip == secondary_ip or
4958           existing_node.secondary_ip == secondary_ip):
4959         raise errors.OpPrereqError("New node ip address(es) conflict with"
4960                                    " existing node %s" % existing_node.name,
4961                                    errors.ECODE_NOTUNIQUE)
4962
4963     # After this 'if' block, None is no longer a valid value for the
4964     # _capable op attributes
4965     if self.op.readd:
4966       old_node = self.cfg.GetNodeInfo(node)
4967       assert old_node is not None, "Can't retrieve locked node %s" % node
4968       for attr in self._NFLAGS:
4969         if getattr(self.op, attr) is None:
4970           setattr(self.op, attr, getattr(old_node, attr))
4971     else:
4972       for attr in self._NFLAGS:
4973         if getattr(self.op, attr) is None:
4974           setattr(self.op, attr, True)
4975
4976     if self.op.readd and not self.op.vm_capable:
4977       pri, sec = cfg.GetNodeInstances(node)
4978       if pri or sec:
4979         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4980                                    " flag set to false, but it already holds"
4981                                    " instances" % node,
4982                                    errors.ECODE_STATE)
4983
4984     # check that the type of the node (single versus dual homed) is the
4985     # same as for the master
4986     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4987     master_singlehomed = myself.secondary_ip == myself.primary_ip
4988     newbie_singlehomed = secondary_ip == primary_ip
4989     if master_singlehomed != newbie_singlehomed:
4990       if master_singlehomed:
4991         raise errors.OpPrereqError("The master has no secondary ip but the"
4992                                    " new node has one",
4993                                    errors.ECODE_INVAL)
4994       else:
4995         raise errors.OpPrereqError("The master has a secondary ip but the"
4996                                    " new node doesn't have one",
4997                                    errors.ECODE_INVAL)
4998
4999     # checks reachability
5000     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5001       raise errors.OpPrereqError("Node not reachable by ping",
5002                                  errors.ECODE_ENVIRON)
5003
5004     if not newbie_singlehomed:
5005       # check reachability from my secondary ip to newbie's secondary ip
5006       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5007                            source=myself.secondary_ip):
5008         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5009                                    " based ping to node daemon port",
5010                                    errors.ECODE_ENVIRON)
5011
5012     if self.op.readd:
5013       exceptions = [node]
5014     else:
5015       exceptions = []
5016
5017     if self.op.master_capable:
5018       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5019     else:
5020       self.master_candidate = False
5021
5022     if self.op.readd:
5023       self.new_node = old_node
5024     else:
5025       node_group = cfg.LookupNodeGroup(self.op.group)
5026       self.new_node = objects.Node(name=node,
5027                                    primary_ip=primary_ip,
5028                                    secondary_ip=secondary_ip,
5029                                    master_candidate=self.master_candidate,
5030                                    offline=False, drained=False,
5031                                    group=node_group)
5032
5033     if self.op.ndparams:
5034       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5035
5036     # check connectivity
5037     result = self.rpc.call_version([self.new_node.name])[self.new_node.name]
5038     result.Raise("Can't get version information from node %s" % node)
5039     if constants.PROTOCOL_VERSION == result.payload:
5040       logging.info("Communication to node %s fine, sw version %s match",
5041                    node, result.payload)
5042     else:
5043       raise errors.OpPrereqError("Version mismatch master version %s,"
5044                                  " node version %s" %
5045                                  (constants.PROTOCOL_VERSION, result.payload),
5046                                  errors.ECODE_ENVIRON)
5047
5048   def Exec(self, feedback_fn):
5049     """Adds the new node to the cluster.
5050
5051     """
5052     new_node = self.new_node
5053     node = new_node.name
5054
5055     # We adding a new node so we assume it's powered
5056     new_node.powered = True
5057
5058     # for re-adds, reset the offline/drained/master-candidate flags;
5059     # we need to reset here, otherwise offline would prevent RPC calls
5060     # later in the procedure; this also means that if the re-add
5061     # fails, we are left with a non-offlined, broken node
5062     if self.op.readd:
5063       new_node.drained = new_node.offline = False # pylint: disable=W0201
5064       self.LogInfo("Readding a node, the offline/drained flags were reset")
5065       # if we demote the node, we do cleanup later in the procedure
5066       new_node.master_candidate = self.master_candidate
5067       if self.changed_primary_ip:
5068         new_node.primary_ip = self.op.primary_ip
5069
5070     # copy the master/vm_capable flags
5071     for attr in self._NFLAGS:
5072       setattr(new_node, attr, getattr(self.op, attr))
5073
5074     # notify the user about any possible mc promotion
5075     if new_node.master_candidate:
5076       self.LogInfo("Node will be a master candidate")
5077
5078     if self.op.ndparams:
5079       new_node.ndparams = self.op.ndparams
5080     else:
5081       new_node.ndparams = {}
5082
5083     # Add node to our /etc/hosts, and add key to known_hosts
5084     if self.cfg.GetClusterInfo().modify_etc_hosts:
5085       master_node = self.cfg.GetMasterNode()
5086       result = self.rpc.call_etc_hosts_modify(master_node,
5087                                               constants.ETC_HOSTS_ADD,
5088                                               self.hostname.name,
5089                                               self.hostname.ip)
5090       result.Raise("Can't update hosts file with new host data")
5091
5092     if new_node.secondary_ip != new_node.primary_ip:
5093       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5094                                False)
5095
5096     node_verify_list = [self.cfg.GetMasterNode()]
5097     node_verify_param = {
5098       constants.NV_NODELIST: ([node], {}),
5099       # TODO: do a node-net-test as well?
5100     }
5101
5102     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5103                                        self.cfg.GetClusterName())
5104     for verifier in node_verify_list:
5105       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5106       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5107       if nl_payload:
5108         for failed in nl_payload:
5109           feedback_fn("ssh/hostname verification failed"
5110                       " (checking from %s): %s" %
5111                       (verifier, nl_payload[failed]))
5112         raise errors.OpExecError("ssh/hostname verification failed")
5113
5114     if self.op.readd:
5115       _RedistributeAncillaryFiles(self)
5116       self.context.ReaddNode(new_node)
5117       # make sure we redistribute the config
5118       self.cfg.Update(new_node, feedback_fn)
5119       # and make sure the new node will not have old files around
5120       if not new_node.master_candidate:
5121         result = self.rpc.call_node_demote_from_mc(new_node.name)
5122         msg = result.fail_msg
5123         if msg:
5124           self.LogWarning("Node failed to demote itself from master"
5125                           " candidate status: %s" % msg)
5126     else:
5127       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5128                                   additional_vm=self.op.vm_capable)
5129       self.context.AddNode(new_node, self.proc.GetECId())
5130
5131
5132 class LUNodeSetParams(LogicalUnit):
5133   """Modifies the parameters of a node.
5134
5135   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5136       to the node role (as _ROLE_*)
5137   @cvar _R2F: a dictionary from node role to tuples of flags
5138   @cvar _FLAGS: a list of attribute names corresponding to the flags
5139
5140   """
5141   HPATH = "node-modify"
5142   HTYPE = constants.HTYPE_NODE
5143   REQ_BGL = False
5144   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5145   _F2R = {
5146     (True, False, False): _ROLE_CANDIDATE,
5147     (False, True, False): _ROLE_DRAINED,
5148     (False, False, True): _ROLE_OFFLINE,
5149     (False, False, False): _ROLE_REGULAR,
5150     }
5151   _R2F = dict((v, k) for k, v in _F2R.items())
5152   _FLAGS = ["master_candidate", "drained", "offline"]
5153
5154   def CheckArguments(self):
5155     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5156     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5157                 self.op.master_capable, self.op.vm_capable,
5158                 self.op.secondary_ip, self.op.ndparams]
5159     if all_mods.count(None) == len(all_mods):
5160       raise errors.OpPrereqError("Please pass at least one modification",
5161                                  errors.ECODE_INVAL)
5162     if all_mods.count(True) > 1:
5163       raise errors.OpPrereqError("Can't set the node into more than one"
5164                                  " state at the same time",
5165                                  errors.ECODE_INVAL)
5166
5167     # Boolean value that tells us whether we might be demoting from MC
5168     self.might_demote = (self.op.master_candidate == False or
5169                          self.op.offline == True or
5170                          self.op.drained == True or
5171                          self.op.master_capable == False)
5172
5173     if self.op.secondary_ip:
5174       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5175         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5176                                    " address" % self.op.secondary_ip,
5177                                    errors.ECODE_INVAL)
5178
5179     self.lock_all = self.op.auto_promote and self.might_demote
5180     self.lock_instances = self.op.secondary_ip is not None
5181
5182   def ExpandNames(self):
5183     if self.lock_all:
5184       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5185     else:
5186       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5187
5188     if self.lock_instances:
5189       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5190
5191   def DeclareLocks(self, level):
5192     # If we have locked all instances, before waiting to lock nodes, release
5193     # all the ones living on nodes unrelated to the current operation.
5194     if level == locking.LEVEL_NODE and self.lock_instances:
5195       self.affected_instances = []
5196       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5197         instances_keep = []
5198
5199         # Build list of instances to release
5200         locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5201         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5202           if (instance.disk_template in constants.DTS_INT_MIRROR and
5203               self.op.node_name in instance.all_nodes):
5204             instances_keep.append(instance_name)
5205             self.affected_instances.append(instance)
5206
5207         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5208
5209         assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5210                 set(instances_keep))
5211
5212   def BuildHooksEnv(self):
5213     """Build hooks env.
5214
5215     This runs on the master node.
5216
5217     """
5218     return {
5219       "OP_TARGET": self.op.node_name,
5220       "MASTER_CANDIDATE": str(self.op.master_candidate),
5221       "OFFLINE": str(self.op.offline),
5222       "DRAINED": str(self.op.drained),
5223       "MASTER_CAPABLE": str(self.op.master_capable),
5224       "VM_CAPABLE": str(self.op.vm_capable),
5225       }
5226
5227   def BuildHooksNodes(self):
5228     """Build hooks nodes.
5229
5230     """
5231     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5232     return (nl, nl)
5233
5234   def CheckPrereq(self):
5235     """Check prerequisites.
5236
5237     This only checks the instance list against the existing names.
5238
5239     """
5240     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5241
5242     if (self.op.master_candidate is not None or
5243         self.op.drained is not None or
5244         self.op.offline is not None):
5245       # we can't change the master's node flags
5246       if self.op.node_name == self.cfg.GetMasterNode():
5247         raise errors.OpPrereqError("The master role can be changed"
5248                                    " only via master-failover",
5249                                    errors.ECODE_INVAL)
5250
5251     if self.op.master_candidate and not node.master_capable:
5252       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5253                                  " it a master candidate" % node.name,
5254                                  errors.ECODE_STATE)
5255
5256     if self.op.vm_capable == False:
5257       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5258       if ipri or isec:
5259         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5260                                    " the vm_capable flag" % node.name,
5261                                    errors.ECODE_STATE)
5262
5263     if node.master_candidate and self.might_demote and not self.lock_all:
5264       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5265       # check if after removing the current node, we're missing master
5266       # candidates
5267       (mc_remaining, mc_should, _) = \
5268           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5269       if mc_remaining < mc_should:
5270         raise errors.OpPrereqError("Not enough master candidates, please"
5271                                    " pass auto promote option to allow"
5272                                    " promotion", errors.ECODE_STATE)
5273
5274     self.old_flags = old_flags = (node.master_candidate,
5275                                   node.drained, node.offline)
5276     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5277     self.old_role = old_role = self._F2R[old_flags]
5278
5279     # Check for ineffective changes
5280     for attr in self._FLAGS:
5281       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5282         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5283         setattr(self.op, attr, None)
5284
5285     # Past this point, any flag change to False means a transition
5286     # away from the respective state, as only real changes are kept
5287
5288     # TODO: We might query the real power state if it supports OOB
5289     if _SupportsOob(self.cfg, node):
5290       if self.op.offline is False and not (node.powered or
5291                                            self.op.powered == True):
5292         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5293                                     " offline status can be reset") %
5294                                    self.op.node_name)
5295     elif self.op.powered is not None:
5296       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5297                                   " as it does not support out-of-band"
5298                                   " handling") % self.op.node_name)
5299
5300     # If we're being deofflined/drained, we'll MC ourself if needed
5301     if (self.op.drained == False or self.op.offline == False or
5302         (self.op.master_capable and not node.master_capable)):
5303       if _DecideSelfPromotion(self):
5304         self.op.master_candidate = True
5305         self.LogInfo("Auto-promoting node to master candidate")
5306
5307     # If we're no longer master capable, we'll demote ourselves from MC
5308     if self.op.master_capable == False and node.master_candidate:
5309       self.LogInfo("Demoting from master candidate")
5310       self.op.master_candidate = False
5311
5312     # Compute new role
5313     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5314     if self.op.master_candidate:
5315       new_role = self._ROLE_CANDIDATE
5316     elif self.op.drained:
5317       new_role = self._ROLE_DRAINED
5318     elif self.op.offline:
5319       new_role = self._ROLE_OFFLINE
5320     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5321       # False is still in new flags, which means we're un-setting (the
5322       # only) True flag
5323       new_role = self._ROLE_REGULAR
5324     else: # no new flags, nothing, keep old role
5325       new_role = old_role
5326
5327     self.new_role = new_role
5328
5329     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5330       # Trying to transition out of offline status
5331       result = self.rpc.call_version([node.name])[node.name]
5332       if result.fail_msg:
5333         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5334                                    " to report its version: %s" %
5335                                    (node.name, result.fail_msg),
5336                                    errors.ECODE_STATE)
5337       else:
5338         self.LogWarning("Transitioning node from offline to online state"
5339                         " without using re-add. Please make sure the node"
5340                         " is healthy!")
5341
5342     if self.op.secondary_ip:
5343       # Ok even without locking, because this can't be changed by any LU
5344       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5345       master_singlehomed = master.secondary_ip == master.primary_ip
5346       if master_singlehomed and self.op.secondary_ip:
5347         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5348                                    " homed cluster", errors.ECODE_INVAL)
5349
5350       if node.offline:
5351         if self.affected_instances:
5352           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5353                                      " node has instances (%s) configured"
5354                                      " to use it" % self.affected_instances)
5355       else:
5356         # On online nodes, check that no instances are running, and that
5357         # the node has the new ip and we can reach it.
5358         for instance in self.affected_instances:
5359           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5360
5361         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5362         if master.name != node.name:
5363           # check reachability from master secondary ip to new secondary ip
5364           if not netutils.TcpPing(self.op.secondary_ip,
5365                                   constants.DEFAULT_NODED_PORT,
5366                                   source=master.secondary_ip):
5367             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5368                                        " based ping to node daemon port",
5369                                        errors.ECODE_ENVIRON)
5370
5371     if self.op.ndparams:
5372       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5373       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5374       self.new_ndparams = new_ndparams
5375
5376   def Exec(self, feedback_fn):
5377     """Modifies a node.
5378
5379     """
5380     node = self.node
5381     old_role = self.old_role
5382     new_role = self.new_role
5383
5384     result = []
5385
5386     if self.op.ndparams:
5387       node.ndparams = self.new_ndparams
5388
5389     if self.op.powered is not None:
5390       node.powered = self.op.powered
5391
5392     for attr in ["master_capable", "vm_capable"]:
5393       val = getattr(self.op, attr)
5394       if val is not None:
5395         setattr(node, attr, val)
5396         result.append((attr, str(val)))
5397
5398     if new_role != old_role:
5399       # Tell the node to demote itself, if no longer MC and not offline
5400       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5401         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5402         if msg:
5403           self.LogWarning("Node failed to demote itself: %s", msg)
5404
5405       new_flags = self._R2F[new_role]
5406       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5407         if of != nf:
5408           result.append((desc, str(nf)))
5409       (node.master_candidate, node.drained, node.offline) = new_flags
5410
5411       # we locked all nodes, we adjust the CP before updating this node
5412       if self.lock_all:
5413         _AdjustCandidatePool(self, [node.name])
5414
5415     if self.op.secondary_ip:
5416       node.secondary_ip = self.op.secondary_ip
5417       result.append(("secondary_ip", self.op.secondary_ip))
5418
5419     # this will trigger configuration file update, if needed
5420     self.cfg.Update(node, feedback_fn)
5421
5422     # this will trigger job queue propagation or cleanup if the mc
5423     # flag changed
5424     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5425       self.context.ReaddNode(node)
5426
5427     return result
5428
5429
5430 class LUNodePowercycle(NoHooksLU):
5431   """Powercycles a node.
5432
5433   """
5434   REQ_BGL = False
5435
5436   def CheckArguments(self):
5437     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5438     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5439       raise errors.OpPrereqError("The node is the master and the force"
5440                                  " parameter was not set",
5441                                  errors.ECODE_INVAL)
5442
5443   def ExpandNames(self):
5444     """Locking for PowercycleNode.
5445
5446     This is a last-resort option and shouldn't block on other
5447     jobs. Therefore, we grab no locks.
5448
5449     """
5450     self.needed_locks = {}
5451
5452   def Exec(self, feedback_fn):
5453     """Reboots a node.
5454
5455     """
5456     result = self.rpc.call_node_powercycle(self.op.node_name,
5457                                            self.cfg.GetHypervisorType())
5458     result.Raise("Failed to schedule the reboot")
5459     return result.payload
5460
5461
5462 class LUClusterQuery(NoHooksLU):
5463   """Query cluster configuration.
5464
5465   """
5466   REQ_BGL = False
5467
5468   def ExpandNames(self):
5469     self.needed_locks = {}
5470
5471   def Exec(self, feedback_fn):
5472     """Return cluster config.
5473
5474     """
5475     cluster = self.cfg.GetClusterInfo()
5476     os_hvp = {}
5477
5478     # Filter just for enabled hypervisors
5479     for os_name, hv_dict in cluster.os_hvp.items():
5480       os_hvp[os_name] = {}
5481       for hv_name, hv_params in hv_dict.items():
5482         if hv_name in cluster.enabled_hypervisors:
5483           os_hvp[os_name][hv_name] = hv_params
5484
5485     # Convert ip_family to ip_version
5486     primary_ip_version = constants.IP4_VERSION
5487     if cluster.primary_ip_family == netutils.IP6Address.family:
5488       primary_ip_version = constants.IP6_VERSION
5489
5490     result = {
5491       "software_version": constants.RELEASE_VERSION,
5492       "protocol_version": constants.PROTOCOL_VERSION,
5493       "config_version": constants.CONFIG_VERSION,
5494       "os_api_version": max(constants.OS_API_VERSIONS),
5495       "export_version": constants.EXPORT_VERSION,
5496       "architecture": (platform.architecture()[0], platform.machine()),
5497       "name": cluster.cluster_name,
5498       "master": cluster.master_node,
5499       "default_hypervisor": cluster.enabled_hypervisors[0],
5500       "enabled_hypervisors": cluster.enabled_hypervisors,
5501       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5502                         for hypervisor_name in cluster.enabled_hypervisors]),
5503       "os_hvp": os_hvp,
5504       "beparams": cluster.beparams,
5505       "osparams": cluster.osparams,
5506       "nicparams": cluster.nicparams,
5507       "ndparams": cluster.ndparams,
5508       "candidate_pool_size": cluster.candidate_pool_size,
5509       "master_netdev": cluster.master_netdev,
5510       "volume_group_name": cluster.volume_group_name,
5511       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5512       "file_storage_dir": cluster.file_storage_dir,
5513       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5514       "maintain_node_health": cluster.maintain_node_health,
5515       "ctime": cluster.ctime,
5516       "mtime": cluster.mtime,
5517       "uuid": cluster.uuid,
5518       "tags": list(cluster.GetTags()),
5519       "uid_pool": cluster.uid_pool,
5520       "default_iallocator": cluster.default_iallocator,
5521       "reserved_lvs": cluster.reserved_lvs,
5522       "primary_ip_version": primary_ip_version,
5523       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5524       "hidden_os": cluster.hidden_os,
5525       "blacklisted_os": cluster.blacklisted_os,
5526       }
5527
5528     return result
5529
5530
5531 class LUClusterConfigQuery(NoHooksLU):
5532   """Return configuration values.
5533
5534   """
5535   REQ_BGL = False
5536   _FIELDS_DYNAMIC = utils.FieldSet()
5537   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5538                                   "watcher_pause", "volume_group_name")
5539
5540   def CheckArguments(self):
5541     _CheckOutputFields(static=self._FIELDS_STATIC,
5542                        dynamic=self._FIELDS_DYNAMIC,
5543                        selected=self.op.output_fields)
5544
5545   def ExpandNames(self):
5546     self.needed_locks = {}
5547
5548   def Exec(self, feedback_fn):
5549     """Dump a representation of the cluster config to the standard output.
5550
5551     """
5552     values = []
5553     for field in self.op.output_fields:
5554       if field == "cluster_name":
5555         entry = self.cfg.GetClusterName()
5556       elif field == "master_node":
5557         entry = self.cfg.GetMasterNode()
5558       elif field == "drain_flag":
5559         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5560       elif field == "watcher_pause":
5561         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5562       elif field == "volume_group_name":
5563         entry = self.cfg.GetVGName()
5564       else:
5565         raise errors.ParameterError(field)
5566       values.append(entry)
5567     return values
5568
5569
5570 class LUInstanceActivateDisks(NoHooksLU):
5571   """Bring up an instance's disks.
5572
5573   """
5574   REQ_BGL = False
5575
5576   def ExpandNames(self):
5577     self._ExpandAndLockInstance()
5578     self.needed_locks[locking.LEVEL_NODE] = []
5579     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5580
5581   def DeclareLocks(self, level):
5582     if level == locking.LEVEL_NODE:
5583       self._LockInstancesNodes()
5584
5585   def CheckPrereq(self):
5586     """Check prerequisites.
5587
5588     This checks that the instance is in the cluster.
5589
5590     """
5591     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5592     assert self.instance is not None, \
5593       "Cannot retrieve locked instance %s" % self.op.instance_name
5594     _CheckNodeOnline(self, self.instance.primary_node)
5595
5596   def Exec(self, feedback_fn):
5597     """Activate the disks.
5598
5599     """
5600     disks_ok, disks_info = \
5601               _AssembleInstanceDisks(self, self.instance,
5602                                      ignore_size=self.op.ignore_size)
5603     if not disks_ok:
5604       raise errors.OpExecError("Cannot activate block devices")
5605
5606     return disks_info
5607
5608
5609 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5610                            ignore_size=False):
5611   """Prepare the block devices for an instance.
5612
5613   This sets up the block devices on all nodes.
5614
5615   @type lu: L{LogicalUnit}
5616   @param lu: the logical unit on whose behalf we execute
5617   @type instance: L{objects.Instance}
5618   @param instance: the instance for whose disks we assemble
5619   @type disks: list of L{objects.Disk} or None
5620   @param disks: which disks to assemble (or all, if None)
5621   @type ignore_secondaries: boolean
5622   @param ignore_secondaries: if true, errors on secondary nodes
5623       won't result in an error return from the function
5624   @type ignore_size: boolean
5625   @param ignore_size: if true, the current known size of the disk
5626       will not be used during the disk activation, useful for cases
5627       when the size is wrong
5628   @return: False if the operation failed, otherwise a list of
5629       (host, instance_visible_name, node_visible_name)
5630       with the mapping from node devices to instance devices
5631
5632   """
5633   device_info = []
5634   disks_ok = True
5635   iname = instance.name
5636   disks = _ExpandCheckDisks(instance, disks)
5637
5638   # With the two passes mechanism we try to reduce the window of
5639   # opportunity for the race condition of switching DRBD to primary
5640   # before handshaking occured, but we do not eliminate it
5641
5642   # The proper fix would be to wait (with some limits) until the
5643   # connection has been made and drbd transitions from WFConnection
5644   # into any other network-connected state (Connected, SyncTarget,
5645   # SyncSource, etc.)
5646
5647   # 1st pass, assemble on all nodes in secondary mode
5648   for idx, inst_disk in enumerate(disks):
5649     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5650       if ignore_size:
5651         node_disk = node_disk.Copy()
5652         node_disk.UnsetSize()
5653       lu.cfg.SetDiskID(node_disk, node)
5654       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5655       msg = result.fail_msg
5656       if msg:
5657         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5658                            " (is_primary=False, pass=1): %s",
5659                            inst_disk.iv_name, node, msg)
5660         if not ignore_secondaries:
5661           disks_ok = False
5662
5663   # FIXME: race condition on drbd migration to primary
5664
5665   # 2nd pass, do only the primary node
5666   for idx, inst_disk in enumerate(disks):
5667     dev_path = None
5668
5669     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5670       if node != instance.primary_node:
5671         continue
5672       if ignore_size:
5673         node_disk = node_disk.Copy()
5674         node_disk.UnsetSize()
5675       lu.cfg.SetDiskID(node_disk, node)
5676       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5677       msg = result.fail_msg
5678       if msg:
5679         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5680                            " (is_primary=True, pass=2): %s",
5681                            inst_disk.iv_name, node, msg)
5682         disks_ok = False
5683       else:
5684         dev_path = result.payload
5685
5686     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5687
5688   # leave the disks configured for the primary node
5689   # this is a workaround that would be fixed better by
5690   # improving the logical/physical id handling
5691   for disk in disks:
5692     lu.cfg.SetDiskID(disk, instance.primary_node)
5693
5694   return disks_ok, device_info
5695
5696
5697 def _StartInstanceDisks(lu, instance, force):
5698   """Start the disks of an instance.
5699
5700   """
5701   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5702                                            ignore_secondaries=force)
5703   if not disks_ok:
5704     _ShutdownInstanceDisks(lu, instance)
5705     if force is not None and not force:
5706       lu.proc.LogWarning("", hint="If the message above refers to a"
5707                          " secondary node,"
5708                          " you can retry the operation using '--force'.")
5709     raise errors.OpExecError("Disk consistency error")
5710
5711
5712 class LUInstanceDeactivateDisks(NoHooksLU):
5713   """Shutdown an instance's disks.
5714
5715   """
5716   REQ_BGL = False
5717
5718   def ExpandNames(self):
5719     self._ExpandAndLockInstance()
5720     self.needed_locks[locking.LEVEL_NODE] = []
5721     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5722
5723   def DeclareLocks(self, level):
5724     if level == locking.LEVEL_NODE:
5725       self._LockInstancesNodes()
5726
5727   def CheckPrereq(self):
5728     """Check prerequisites.
5729
5730     This checks that the instance is in the cluster.
5731
5732     """
5733     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5734     assert self.instance is not None, \
5735       "Cannot retrieve locked instance %s" % self.op.instance_name
5736
5737   def Exec(self, feedback_fn):
5738     """Deactivate the disks
5739
5740     """
5741     instance = self.instance
5742     if self.op.force:
5743       _ShutdownInstanceDisks(self, instance)
5744     else:
5745       _SafeShutdownInstanceDisks(self, instance)
5746
5747
5748 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5749   """Shutdown block devices of an instance.
5750
5751   This function checks if an instance is running, before calling
5752   _ShutdownInstanceDisks.
5753
5754   """
5755   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5756   _ShutdownInstanceDisks(lu, instance, disks=disks)
5757
5758
5759 def _ExpandCheckDisks(instance, disks):
5760   """Return the instance disks selected by the disks list
5761
5762   @type disks: list of L{objects.Disk} or None
5763   @param disks: selected disks
5764   @rtype: list of L{objects.Disk}
5765   @return: selected instance disks to act on
5766
5767   """
5768   if disks is None:
5769     return instance.disks
5770   else:
5771     if not set(disks).issubset(instance.disks):
5772       raise errors.ProgrammerError("Can only act on disks belonging to the"
5773                                    " target instance")
5774     return disks
5775
5776
5777 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5778   """Shutdown block devices of an instance.
5779
5780   This does the shutdown on all nodes of the instance.
5781
5782   If the ignore_primary is false, errors on the primary node are
5783   ignored.
5784
5785   """
5786   all_result = True
5787   disks = _ExpandCheckDisks(instance, disks)
5788
5789   for disk in disks:
5790     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5791       lu.cfg.SetDiskID(top_disk, node)
5792       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5793       msg = result.fail_msg
5794       if msg:
5795         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5796                       disk.iv_name, node, msg)
5797         if ((node == instance.primary_node and not ignore_primary) or
5798             (node != instance.primary_node and not result.offline)):
5799           all_result = False
5800   return all_result
5801
5802
5803 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5804   """Checks if a node has enough free memory.
5805
5806   This function check if a given node has the needed amount of free
5807   memory. In case the node has less memory or we cannot get the
5808   information from the node, this function raise an OpPrereqError
5809   exception.
5810
5811   @type lu: C{LogicalUnit}
5812   @param lu: a logical unit from which we get configuration data
5813   @type node: C{str}
5814   @param node: the node to check
5815   @type reason: C{str}
5816   @param reason: string to use in the error message
5817   @type requested: C{int}
5818   @param requested: the amount of memory in MiB to check for
5819   @type hypervisor_name: C{str}
5820   @param hypervisor_name: the hypervisor to ask for memory stats
5821   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5822       we cannot check the node
5823
5824   """
5825   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5826   nodeinfo[node].Raise("Can't get data from node %s" % node,
5827                        prereq=True, ecode=errors.ECODE_ENVIRON)
5828   free_mem = nodeinfo[node].payload.get("memory_free", None)
5829   if not isinstance(free_mem, int):
5830     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5831                                " was '%s'" % (node, free_mem),
5832                                errors.ECODE_ENVIRON)
5833   if requested > free_mem:
5834     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5835                                " needed %s MiB, available %s MiB" %
5836                                (node, reason, requested, free_mem),
5837                                errors.ECODE_NORES)
5838
5839
5840 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5841   """Checks if nodes have enough free disk space in the all VGs.
5842
5843   This function check if all given nodes have the needed amount of
5844   free disk. In case any node has less disk or we cannot get the
5845   information from the node, this function raise an OpPrereqError
5846   exception.
5847
5848   @type lu: C{LogicalUnit}
5849   @param lu: a logical unit from which we get configuration data
5850   @type nodenames: C{list}
5851   @param nodenames: the list of node names to check
5852   @type req_sizes: C{dict}
5853   @param req_sizes: the hash of vg and corresponding amount of disk in
5854       MiB to check for
5855   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5856       or we cannot check the node
5857
5858   """
5859   for vg, req_size in req_sizes.items():
5860     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5861
5862
5863 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5864   """Checks if nodes have enough free disk space in the specified VG.
5865
5866   This function check if all given nodes have the needed amount of
5867   free disk. In case any node has less disk or we cannot get the
5868   information from the node, this function raise an OpPrereqError
5869   exception.
5870
5871   @type lu: C{LogicalUnit}
5872   @param lu: a logical unit from which we get configuration data
5873   @type nodenames: C{list}
5874   @param nodenames: the list of node names to check
5875   @type vg: C{str}
5876   @param vg: the volume group to check
5877   @type requested: C{int}
5878   @param requested: the amount of disk in MiB to check for
5879   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5880       or we cannot check the node
5881
5882   """
5883   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5884   for node in nodenames:
5885     info = nodeinfo[node]
5886     info.Raise("Cannot get current information from node %s" % node,
5887                prereq=True, ecode=errors.ECODE_ENVIRON)
5888     vg_free = info.payload.get("vg_free", None)
5889     if not isinstance(vg_free, int):
5890       raise errors.OpPrereqError("Can't compute free disk space on node"
5891                                  " %s for vg %s, result was '%s'" %
5892                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5893     if requested > vg_free:
5894       raise errors.OpPrereqError("Not enough disk space on target node %s"
5895                                  " vg %s: required %d MiB, available %d MiB" %
5896                                  (node, vg, requested, vg_free),
5897                                  errors.ECODE_NORES)
5898
5899
5900 class LUInstanceStartup(LogicalUnit):
5901   """Starts an instance.
5902
5903   """
5904   HPATH = "instance-start"
5905   HTYPE = constants.HTYPE_INSTANCE
5906   REQ_BGL = False
5907
5908   def CheckArguments(self):
5909     # extra beparams
5910     if self.op.beparams:
5911       # fill the beparams dict
5912       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5913
5914   def ExpandNames(self):
5915     self._ExpandAndLockInstance()
5916
5917   def BuildHooksEnv(self):
5918     """Build hooks env.
5919
5920     This runs on master, primary and secondary nodes of the instance.
5921
5922     """
5923     env = {
5924       "FORCE": self.op.force,
5925       }
5926
5927     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5928
5929     return env
5930
5931   def BuildHooksNodes(self):
5932     """Build hooks nodes.
5933
5934     """
5935     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5936     return (nl, nl)
5937
5938   def CheckPrereq(self):
5939     """Check prerequisites.
5940
5941     This checks that the instance is in the cluster.
5942
5943     """
5944     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5945     assert self.instance is not None, \
5946       "Cannot retrieve locked instance %s" % self.op.instance_name
5947
5948     # extra hvparams
5949     if self.op.hvparams:
5950       # check hypervisor parameter syntax (locally)
5951       cluster = self.cfg.GetClusterInfo()
5952       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5953       filled_hvp = cluster.FillHV(instance)
5954       filled_hvp.update(self.op.hvparams)
5955       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5956       hv_type.CheckParameterSyntax(filled_hvp)
5957       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5958
5959     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5960
5961     if self.primary_offline and self.op.ignore_offline_nodes:
5962       self.proc.LogWarning("Ignoring offline primary node")
5963
5964       if self.op.hvparams or self.op.beparams:
5965         self.proc.LogWarning("Overridden parameters are ignored")
5966     else:
5967       _CheckNodeOnline(self, instance.primary_node)
5968
5969       bep = self.cfg.GetClusterInfo().FillBE(instance)
5970
5971       # check bridges existence
5972       _CheckInstanceBridgesExist(self, instance)
5973
5974       remote_info = self.rpc.call_instance_info(instance.primary_node,
5975                                                 instance.name,
5976                                                 instance.hypervisor)
5977       remote_info.Raise("Error checking node %s" % instance.primary_node,
5978                         prereq=True, ecode=errors.ECODE_ENVIRON)
5979       if not remote_info.payload: # not running already
5980         _CheckNodeFreeMemory(self, instance.primary_node,
5981                              "starting instance %s" % instance.name,
5982                              bep[constants.BE_MEMORY], instance.hypervisor)
5983
5984   def Exec(self, feedback_fn):
5985     """Start the instance.
5986
5987     """
5988     instance = self.instance
5989     force = self.op.force
5990
5991     if not self.op.no_remember:
5992       self.cfg.MarkInstanceUp(instance.name)
5993
5994     if self.primary_offline:
5995       assert self.op.ignore_offline_nodes
5996       self.proc.LogInfo("Primary node offline, marked instance as started")
5997     else:
5998       node_current = instance.primary_node
5999
6000       _StartInstanceDisks(self, instance, force)
6001
6002       result = self.rpc.call_instance_start(node_current, instance,
6003                                             self.op.hvparams, self.op.beparams,
6004                                             self.op.startup_paused)
6005       msg = result.fail_msg
6006       if msg:
6007         _ShutdownInstanceDisks(self, instance)
6008         raise errors.OpExecError("Could not start instance: %s" % msg)
6009
6010
6011 class LUInstanceReboot(LogicalUnit):
6012   """Reboot an instance.
6013
6014   """
6015   HPATH = "instance-reboot"
6016   HTYPE = constants.HTYPE_INSTANCE
6017   REQ_BGL = False
6018
6019   def ExpandNames(self):
6020     self._ExpandAndLockInstance()
6021
6022   def BuildHooksEnv(self):
6023     """Build hooks env.
6024
6025     This runs on master, primary and secondary nodes of the instance.
6026
6027     """
6028     env = {
6029       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6030       "REBOOT_TYPE": self.op.reboot_type,
6031       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6032       }
6033
6034     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6035
6036     return env
6037
6038   def BuildHooksNodes(self):
6039     """Build hooks nodes.
6040
6041     """
6042     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6043     return (nl, nl)
6044
6045   def CheckPrereq(self):
6046     """Check prerequisites.
6047
6048     This checks that the instance is in the cluster.
6049
6050     """
6051     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6052     assert self.instance is not None, \
6053       "Cannot retrieve locked instance %s" % self.op.instance_name
6054
6055     _CheckNodeOnline(self, instance.primary_node)
6056
6057     # check bridges existence
6058     _CheckInstanceBridgesExist(self, instance)
6059
6060   def Exec(self, feedback_fn):
6061     """Reboot the instance.
6062
6063     """
6064     instance = self.instance
6065     ignore_secondaries = self.op.ignore_secondaries
6066     reboot_type = self.op.reboot_type
6067
6068     remote_info = self.rpc.call_instance_info(instance.primary_node,
6069                                               instance.name,
6070                                               instance.hypervisor)
6071     remote_info.Raise("Error checking node %s" % instance.primary_node)
6072     instance_running = bool(remote_info.payload)
6073
6074     node_current = instance.primary_node
6075
6076     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6077                                             constants.INSTANCE_REBOOT_HARD]:
6078       for disk in instance.disks:
6079         self.cfg.SetDiskID(disk, node_current)
6080       result = self.rpc.call_instance_reboot(node_current, instance,
6081                                              reboot_type,
6082                                              self.op.shutdown_timeout)
6083       result.Raise("Could not reboot instance")
6084     else:
6085       if instance_running:
6086         result = self.rpc.call_instance_shutdown(node_current, instance,
6087                                                  self.op.shutdown_timeout)
6088         result.Raise("Could not shutdown instance for full reboot")
6089         _ShutdownInstanceDisks(self, instance)
6090       else:
6091         self.LogInfo("Instance %s was already stopped, starting now",
6092                      instance.name)
6093       _StartInstanceDisks(self, instance, ignore_secondaries)
6094       result = self.rpc.call_instance_start(node_current, instance,
6095                                             None, None, False)
6096       msg = result.fail_msg
6097       if msg:
6098         _ShutdownInstanceDisks(self, instance)
6099         raise errors.OpExecError("Could not start instance for"
6100                                  " full reboot: %s" % msg)
6101
6102     self.cfg.MarkInstanceUp(instance.name)
6103
6104
6105 class LUInstanceShutdown(LogicalUnit):
6106   """Shutdown an instance.
6107
6108   """
6109   HPATH = "instance-stop"
6110   HTYPE = constants.HTYPE_INSTANCE
6111   REQ_BGL = False
6112
6113   def ExpandNames(self):
6114     self._ExpandAndLockInstance()
6115
6116   def BuildHooksEnv(self):
6117     """Build hooks env.
6118
6119     This runs on master, primary and secondary nodes of the instance.
6120
6121     """
6122     env = _BuildInstanceHookEnvByObject(self, self.instance)
6123     env["TIMEOUT"] = self.op.timeout
6124     return env
6125
6126   def BuildHooksNodes(self):
6127     """Build hooks nodes.
6128
6129     """
6130     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6131     return (nl, nl)
6132
6133   def CheckPrereq(self):
6134     """Check prerequisites.
6135
6136     This checks that the instance is in the cluster.
6137
6138     """
6139     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6140     assert self.instance is not None, \
6141       "Cannot retrieve locked instance %s" % self.op.instance_name
6142
6143     self.primary_offline = \
6144       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6145
6146     if self.primary_offline and self.op.ignore_offline_nodes:
6147       self.proc.LogWarning("Ignoring offline primary node")
6148     else:
6149       _CheckNodeOnline(self, self.instance.primary_node)
6150
6151   def Exec(self, feedback_fn):
6152     """Shutdown the instance.
6153
6154     """
6155     instance = self.instance
6156     node_current = instance.primary_node
6157     timeout = self.op.timeout
6158
6159     if not self.op.no_remember:
6160       self.cfg.MarkInstanceDown(instance.name)
6161
6162     if self.primary_offline:
6163       assert self.op.ignore_offline_nodes
6164       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6165     else:
6166       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6167       msg = result.fail_msg
6168       if msg:
6169         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6170
6171       _ShutdownInstanceDisks(self, instance)
6172
6173
6174 class LUInstanceReinstall(LogicalUnit):
6175   """Reinstall an instance.
6176
6177   """
6178   HPATH = "instance-reinstall"
6179   HTYPE = constants.HTYPE_INSTANCE
6180   REQ_BGL = False
6181
6182   def ExpandNames(self):
6183     self._ExpandAndLockInstance()
6184
6185   def BuildHooksEnv(self):
6186     """Build hooks env.
6187
6188     This runs on master, primary and secondary nodes of the instance.
6189
6190     """
6191     return _BuildInstanceHookEnvByObject(self, self.instance)
6192
6193   def BuildHooksNodes(self):
6194     """Build hooks nodes.
6195
6196     """
6197     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6198     return (nl, nl)
6199
6200   def CheckPrereq(self):
6201     """Check prerequisites.
6202
6203     This checks that the instance is in the cluster and is not running.
6204
6205     """
6206     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6207     assert instance is not None, \
6208       "Cannot retrieve locked instance %s" % self.op.instance_name
6209     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6210                      " offline, cannot reinstall")
6211     for node in instance.secondary_nodes:
6212       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6213                        " cannot reinstall")
6214
6215     if instance.disk_template == constants.DT_DISKLESS:
6216       raise errors.OpPrereqError("Instance '%s' has no disks" %
6217                                  self.op.instance_name,
6218                                  errors.ECODE_INVAL)
6219     _CheckInstanceDown(self, instance, "cannot reinstall")
6220
6221     if self.op.os_type is not None:
6222       # OS verification
6223       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6224       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6225       instance_os = self.op.os_type
6226     else:
6227       instance_os = instance.os
6228
6229     nodelist = list(instance.all_nodes)
6230
6231     if self.op.osparams:
6232       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6233       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6234       self.os_inst = i_osdict # the new dict (without defaults)
6235     else:
6236       self.os_inst = None
6237
6238     self.instance = instance
6239
6240   def Exec(self, feedback_fn):
6241     """Reinstall the instance.
6242
6243     """
6244     inst = self.instance
6245
6246     if self.op.os_type is not None:
6247       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6248       inst.os = self.op.os_type
6249       # Write to configuration
6250       self.cfg.Update(inst, feedback_fn)
6251
6252     _StartInstanceDisks(self, inst, None)
6253     try:
6254       feedback_fn("Running the instance OS create scripts...")
6255       # FIXME: pass debug option from opcode to backend
6256       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6257                                              self.op.debug_level,
6258                                              osparams=self.os_inst)
6259       result.Raise("Could not install OS for instance %s on node %s" %
6260                    (inst.name, inst.primary_node))
6261     finally:
6262       _ShutdownInstanceDisks(self, inst)
6263
6264
6265 class LUInstanceRecreateDisks(LogicalUnit):
6266   """Recreate an instance's missing disks.
6267
6268   """
6269   HPATH = "instance-recreate-disks"
6270   HTYPE = constants.HTYPE_INSTANCE
6271   REQ_BGL = False
6272
6273   def CheckArguments(self):
6274     # normalise the disk list
6275     self.op.disks = sorted(frozenset(self.op.disks))
6276
6277   def ExpandNames(self):
6278     self._ExpandAndLockInstance()
6279     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6280     if self.op.nodes:
6281       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6282       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6283     else:
6284       self.needed_locks[locking.LEVEL_NODE] = []
6285
6286   def DeclareLocks(self, level):
6287     if level == locking.LEVEL_NODE:
6288       # if we replace the nodes, we only need to lock the old primary,
6289       # otherwise we need to lock all nodes for disk re-creation
6290       primary_only = bool(self.op.nodes)
6291       self._LockInstancesNodes(primary_only=primary_only)
6292
6293   def BuildHooksEnv(self):
6294     """Build hooks env.
6295
6296     This runs on master, primary and secondary nodes of the instance.
6297
6298     """
6299     return _BuildInstanceHookEnvByObject(self, self.instance)
6300
6301   def BuildHooksNodes(self):
6302     """Build hooks nodes.
6303
6304     """
6305     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6306     return (nl, nl)
6307
6308   def CheckPrereq(self):
6309     """Check prerequisites.
6310
6311     This checks that the instance is in the cluster and is not running.
6312
6313     """
6314     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6315     assert instance is not None, \
6316       "Cannot retrieve locked instance %s" % self.op.instance_name
6317     if self.op.nodes:
6318       if len(self.op.nodes) != len(instance.all_nodes):
6319         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6320                                    " %d replacement nodes were specified" %
6321                                    (instance.name, len(instance.all_nodes),
6322                                     len(self.op.nodes)),
6323                                    errors.ECODE_INVAL)
6324       assert instance.disk_template != constants.DT_DRBD8 or \
6325           len(self.op.nodes) == 2
6326       assert instance.disk_template != constants.DT_PLAIN or \
6327           len(self.op.nodes) == 1
6328       primary_node = self.op.nodes[0]
6329     else:
6330       primary_node = instance.primary_node
6331     _CheckNodeOnline(self, primary_node)
6332
6333     if instance.disk_template == constants.DT_DISKLESS:
6334       raise errors.OpPrereqError("Instance '%s' has no disks" %
6335                                  self.op.instance_name, errors.ECODE_INVAL)
6336     # if we replace nodes *and* the old primary is offline, we don't
6337     # check
6338     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6339     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6340     if not (self.op.nodes and old_pnode.offline):
6341       _CheckInstanceDown(self, instance, "cannot recreate disks")
6342
6343     if not self.op.disks:
6344       self.op.disks = range(len(instance.disks))
6345     else:
6346       for idx in self.op.disks:
6347         if idx >= len(instance.disks):
6348           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6349                                      errors.ECODE_INVAL)
6350     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6351       raise errors.OpPrereqError("Can't recreate disks partially and"
6352                                  " change the nodes at the same time",
6353                                  errors.ECODE_INVAL)
6354     self.instance = instance
6355
6356   def Exec(self, feedback_fn):
6357     """Recreate the disks.
6358
6359     """
6360     instance = self.instance
6361
6362     to_skip = []
6363     mods = [] # keeps track of needed logical_id changes
6364
6365     for idx, disk in enumerate(instance.disks):
6366       if idx not in self.op.disks: # disk idx has not been passed in
6367         to_skip.append(idx)
6368         continue
6369       # update secondaries for disks, if needed
6370       if self.op.nodes:
6371         if disk.dev_type == constants.LD_DRBD8:
6372           # need to update the nodes and minors
6373           assert len(self.op.nodes) == 2
6374           assert len(disk.logical_id) == 6 # otherwise disk internals
6375                                            # have changed
6376           (_, _, old_port, _, _, old_secret) = disk.logical_id
6377           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6378           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6379                     new_minors[0], new_minors[1], old_secret)
6380           assert len(disk.logical_id) == len(new_id)
6381           mods.append((idx, new_id))
6382
6383     # now that we have passed all asserts above, we can apply the mods
6384     # in a single run (to avoid partial changes)
6385     for idx, new_id in mods:
6386       instance.disks[idx].logical_id = new_id
6387
6388     # change primary node, if needed
6389     if self.op.nodes:
6390       instance.primary_node = self.op.nodes[0]
6391       self.LogWarning("Changing the instance's nodes, you will have to"
6392                       " remove any disks left on the older nodes manually")
6393
6394     if self.op.nodes:
6395       self.cfg.Update(instance, feedback_fn)
6396
6397     _CreateDisks(self, instance, to_skip=to_skip)
6398
6399
6400 class LUInstanceRename(LogicalUnit):
6401   """Rename an instance.
6402
6403   """
6404   HPATH = "instance-rename"
6405   HTYPE = constants.HTYPE_INSTANCE
6406
6407   def CheckArguments(self):
6408     """Check arguments.
6409
6410     """
6411     if self.op.ip_check and not self.op.name_check:
6412       # TODO: make the ip check more flexible and not depend on the name check
6413       raise errors.OpPrereqError("IP address check requires a name check",
6414                                  errors.ECODE_INVAL)
6415
6416   def BuildHooksEnv(self):
6417     """Build hooks env.
6418
6419     This runs on master, primary and secondary nodes of the instance.
6420
6421     """
6422     env = _BuildInstanceHookEnvByObject(self, self.instance)
6423     env["INSTANCE_NEW_NAME"] = self.op.new_name
6424     return env
6425
6426   def BuildHooksNodes(self):
6427     """Build hooks nodes.
6428
6429     """
6430     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6431     return (nl, nl)
6432
6433   def CheckPrereq(self):
6434     """Check prerequisites.
6435
6436     This checks that the instance is in the cluster and is not running.
6437
6438     """
6439     self.op.instance_name = _ExpandInstanceName(self.cfg,
6440                                                 self.op.instance_name)
6441     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6442     assert instance is not None
6443     _CheckNodeOnline(self, instance.primary_node)
6444     _CheckInstanceDown(self, instance, "cannot rename")
6445     self.instance = instance
6446
6447     new_name = self.op.new_name
6448     if self.op.name_check:
6449       hostname = netutils.GetHostname(name=new_name)
6450       if hostname.name != new_name:
6451         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6452                      hostname.name)
6453       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6454         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6455                                     " same as given hostname '%s'") %
6456                                     (hostname.name, self.op.new_name),
6457                                     errors.ECODE_INVAL)
6458       new_name = self.op.new_name = hostname.name
6459       if (self.op.ip_check and
6460           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6461         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6462                                    (hostname.ip, new_name),
6463                                    errors.ECODE_NOTUNIQUE)
6464
6465     instance_list = self.cfg.GetInstanceList()
6466     if new_name in instance_list and new_name != instance.name:
6467       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6468                                  new_name, errors.ECODE_EXISTS)
6469
6470   def Exec(self, feedback_fn):
6471     """Rename the instance.
6472
6473     """
6474     inst = self.instance
6475     old_name = inst.name
6476
6477     rename_file_storage = False
6478     if (inst.disk_template in constants.DTS_FILEBASED and
6479         self.op.new_name != inst.name):
6480       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6481       rename_file_storage = True
6482
6483     self.cfg.RenameInstance(inst.name, self.op.new_name)
6484     # Change the instance lock. This is definitely safe while we hold the BGL.
6485     # Otherwise the new lock would have to be added in acquired mode.
6486     assert self.REQ_BGL
6487     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6488     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6489
6490     # re-read the instance from the configuration after rename
6491     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6492
6493     if rename_file_storage:
6494       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6495       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6496                                                      old_file_storage_dir,
6497                                                      new_file_storage_dir)
6498       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6499                    " (but the instance has been renamed in Ganeti)" %
6500                    (inst.primary_node, old_file_storage_dir,
6501                     new_file_storage_dir))
6502
6503     _StartInstanceDisks(self, inst, None)
6504     try:
6505       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6506                                                  old_name, self.op.debug_level)
6507       msg = result.fail_msg
6508       if msg:
6509         msg = ("Could not run OS rename script for instance %s on node %s"
6510                " (but the instance has been renamed in Ganeti): %s" %
6511                (inst.name, inst.primary_node, msg))
6512         self.proc.LogWarning(msg)
6513     finally:
6514       _ShutdownInstanceDisks(self, inst)
6515
6516     return inst.name
6517
6518
6519 class LUInstanceRemove(LogicalUnit):
6520   """Remove an instance.
6521
6522   """
6523   HPATH = "instance-remove"
6524   HTYPE = constants.HTYPE_INSTANCE
6525   REQ_BGL = False
6526
6527   def ExpandNames(self):
6528     self._ExpandAndLockInstance()
6529     self.needed_locks[locking.LEVEL_NODE] = []
6530     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6531
6532   def DeclareLocks(self, level):
6533     if level == locking.LEVEL_NODE:
6534       self._LockInstancesNodes()
6535
6536   def BuildHooksEnv(self):
6537     """Build hooks env.
6538
6539     This runs on master, primary and secondary nodes of the instance.
6540
6541     """
6542     env = _BuildInstanceHookEnvByObject(self, self.instance)
6543     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6544     return env
6545
6546   def BuildHooksNodes(self):
6547     """Build hooks nodes.
6548
6549     """
6550     nl = [self.cfg.GetMasterNode()]
6551     nl_post = list(self.instance.all_nodes) + nl
6552     return (nl, nl_post)
6553
6554   def CheckPrereq(self):
6555     """Check prerequisites.
6556
6557     This checks that the instance is in the cluster.
6558
6559     """
6560     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6561     assert self.instance is not None, \
6562       "Cannot retrieve locked instance %s" % self.op.instance_name
6563
6564   def Exec(self, feedback_fn):
6565     """Remove the instance.
6566
6567     """
6568     instance = self.instance
6569     logging.info("Shutting down instance %s on node %s",
6570                  instance.name, instance.primary_node)
6571
6572     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6573                                              self.op.shutdown_timeout)
6574     msg = result.fail_msg
6575     if msg:
6576       if self.op.ignore_failures:
6577         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6578       else:
6579         raise errors.OpExecError("Could not shutdown instance %s on"
6580                                  " node %s: %s" %
6581                                  (instance.name, instance.primary_node, msg))
6582
6583     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6584
6585
6586 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6587   """Utility function to remove an instance.
6588
6589   """
6590   logging.info("Removing block devices for instance %s", instance.name)
6591
6592   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
6593     if not ignore_failures:
6594       raise errors.OpExecError("Can't remove instance's disks")
6595     feedback_fn("Warning: can't remove instance's disks")
6596
6597   logging.info("Removing instance %s out of cluster config", instance.name)
6598
6599   lu.cfg.RemoveInstance(instance.name)
6600
6601   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6602     "Instance lock removal conflict"
6603
6604   # Remove lock for the instance
6605   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6606
6607
6608 class LUInstanceQuery(NoHooksLU):
6609   """Logical unit for querying instances.
6610
6611   """
6612   # pylint: disable=W0142
6613   REQ_BGL = False
6614
6615   def CheckArguments(self):
6616     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6617                              self.op.output_fields, self.op.use_locking)
6618
6619   def ExpandNames(self):
6620     self.iq.ExpandNames(self)
6621
6622   def DeclareLocks(self, level):
6623     self.iq.DeclareLocks(self, level)
6624
6625   def Exec(self, feedback_fn):
6626     return self.iq.OldStyleQuery(self)
6627
6628
6629 class LUInstanceFailover(LogicalUnit):
6630   """Failover an instance.
6631
6632   """
6633   HPATH = "instance-failover"
6634   HTYPE = constants.HTYPE_INSTANCE
6635   REQ_BGL = False
6636
6637   def CheckArguments(self):
6638     """Check the arguments.
6639
6640     """
6641     self.iallocator = getattr(self.op, "iallocator", None)
6642     self.target_node = getattr(self.op, "target_node", None)
6643
6644   def ExpandNames(self):
6645     self._ExpandAndLockInstance()
6646
6647     if self.op.target_node is not None:
6648       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6649
6650     self.needed_locks[locking.LEVEL_NODE] = []
6651     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6652
6653     ignore_consistency = self.op.ignore_consistency
6654     shutdown_timeout = self.op.shutdown_timeout
6655     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6656                                        cleanup=False,
6657                                        failover=True,
6658                                        ignore_consistency=ignore_consistency,
6659                                        shutdown_timeout=shutdown_timeout)
6660     self.tasklets = [self._migrater]
6661
6662   def DeclareLocks(self, level):
6663     if level == locking.LEVEL_NODE:
6664       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6665       if instance.disk_template in constants.DTS_EXT_MIRROR:
6666         if self.op.target_node is None:
6667           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6668         else:
6669           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6670                                                    self.op.target_node]
6671         del self.recalculate_locks[locking.LEVEL_NODE]
6672       else:
6673         self._LockInstancesNodes()
6674
6675   def BuildHooksEnv(self):
6676     """Build hooks env.
6677
6678     This runs on master, primary and secondary nodes of the instance.
6679
6680     """
6681     instance = self._migrater.instance
6682     source_node = instance.primary_node
6683     target_node = self.op.target_node
6684     env = {
6685       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6686       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6687       "OLD_PRIMARY": source_node,
6688       "NEW_PRIMARY": target_node,
6689       }
6690
6691     if instance.disk_template in constants.DTS_INT_MIRROR:
6692       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6693       env["NEW_SECONDARY"] = source_node
6694     else:
6695       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6696
6697     env.update(_BuildInstanceHookEnvByObject(self, instance))
6698
6699     return env
6700
6701   def BuildHooksNodes(self):
6702     """Build hooks nodes.
6703
6704     """
6705     instance = self._migrater.instance
6706     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6707     return (nl, nl + [instance.primary_node])
6708
6709
6710 class LUInstanceMigrate(LogicalUnit):
6711   """Migrate an instance.
6712
6713   This is migration without shutting down, compared to the failover,
6714   which is done with shutdown.
6715
6716   """
6717   HPATH = "instance-migrate"
6718   HTYPE = constants.HTYPE_INSTANCE
6719   REQ_BGL = False
6720
6721   def ExpandNames(self):
6722     self._ExpandAndLockInstance()
6723
6724     if self.op.target_node is not None:
6725       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6726
6727     self.needed_locks[locking.LEVEL_NODE] = []
6728     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6729
6730     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6731                                        cleanup=self.op.cleanup,
6732                                        failover=False,
6733                                        fallback=self.op.allow_failover)
6734     self.tasklets = [self._migrater]
6735
6736   def DeclareLocks(self, level):
6737     if level == locking.LEVEL_NODE:
6738       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6739       if instance.disk_template in constants.DTS_EXT_MIRROR:
6740         if self.op.target_node is None:
6741           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6742         else:
6743           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6744                                                    self.op.target_node]
6745         del self.recalculate_locks[locking.LEVEL_NODE]
6746       else:
6747         self._LockInstancesNodes()
6748
6749   def BuildHooksEnv(self):
6750     """Build hooks env.
6751
6752     This runs on master, primary and secondary nodes of the instance.
6753
6754     """
6755     instance = self._migrater.instance
6756     source_node = instance.primary_node
6757     target_node = self.op.target_node
6758     env = _BuildInstanceHookEnvByObject(self, instance)
6759     env.update({
6760       "MIGRATE_LIVE": self._migrater.live,
6761       "MIGRATE_CLEANUP": self.op.cleanup,
6762       "OLD_PRIMARY": source_node,
6763       "NEW_PRIMARY": target_node,
6764       })
6765
6766     if instance.disk_template in constants.DTS_INT_MIRROR:
6767       env["OLD_SECONDARY"] = target_node
6768       env["NEW_SECONDARY"] = source_node
6769     else:
6770       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6771
6772     return env
6773
6774   def BuildHooksNodes(self):
6775     """Build hooks nodes.
6776
6777     """
6778     instance = self._migrater.instance
6779     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6780     return (nl, nl + [instance.primary_node])
6781
6782
6783 class LUInstanceMove(LogicalUnit):
6784   """Move an instance by data-copying.
6785
6786   """
6787   HPATH = "instance-move"
6788   HTYPE = constants.HTYPE_INSTANCE
6789   REQ_BGL = False
6790
6791   def ExpandNames(self):
6792     self._ExpandAndLockInstance()
6793     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6794     self.op.target_node = target_node
6795     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6796     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6797
6798   def DeclareLocks(self, level):
6799     if level == locking.LEVEL_NODE:
6800       self._LockInstancesNodes(primary_only=True)
6801
6802   def BuildHooksEnv(self):
6803     """Build hooks env.
6804
6805     This runs on master, primary and secondary nodes of the instance.
6806
6807     """
6808     env = {
6809       "TARGET_NODE": self.op.target_node,
6810       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6811       }
6812     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6813     return env
6814
6815   def BuildHooksNodes(self):
6816     """Build hooks nodes.
6817
6818     """
6819     nl = [
6820       self.cfg.GetMasterNode(),
6821       self.instance.primary_node,
6822       self.op.target_node,
6823       ]
6824     return (nl, nl)
6825
6826   def CheckPrereq(self):
6827     """Check prerequisites.
6828
6829     This checks that the instance is in the cluster.
6830
6831     """
6832     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6833     assert self.instance is not None, \
6834       "Cannot retrieve locked instance %s" % self.op.instance_name
6835
6836     node = self.cfg.GetNodeInfo(self.op.target_node)
6837     assert node is not None, \
6838       "Cannot retrieve locked node %s" % self.op.target_node
6839
6840     self.target_node = target_node = node.name
6841
6842     if target_node == instance.primary_node:
6843       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6844                                  (instance.name, target_node),
6845                                  errors.ECODE_STATE)
6846
6847     bep = self.cfg.GetClusterInfo().FillBE(instance)
6848
6849     for idx, dsk in enumerate(instance.disks):
6850       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6851         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6852                                    " cannot copy" % idx, errors.ECODE_STATE)
6853
6854     _CheckNodeOnline(self, target_node)
6855     _CheckNodeNotDrained(self, target_node)
6856     _CheckNodeVmCapable(self, target_node)
6857
6858     if instance.admin_up:
6859       # check memory requirements on the secondary node
6860       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6861                            instance.name, bep[constants.BE_MEMORY],
6862                            instance.hypervisor)
6863     else:
6864       self.LogInfo("Not checking memory on the secondary node as"
6865                    " instance will not be started")
6866
6867     # check bridge existance
6868     _CheckInstanceBridgesExist(self, instance, node=target_node)
6869
6870   def Exec(self, feedback_fn):
6871     """Move an instance.
6872
6873     The move is done by shutting it down on its present node, copying
6874     the data over (slow) and starting it on the new node.
6875
6876     """
6877     instance = self.instance
6878
6879     source_node = instance.primary_node
6880     target_node = self.target_node
6881
6882     self.LogInfo("Shutting down instance %s on source node %s",
6883                  instance.name, source_node)
6884
6885     result = self.rpc.call_instance_shutdown(source_node, instance,
6886                                              self.op.shutdown_timeout)
6887     msg = result.fail_msg
6888     if msg:
6889       if self.op.ignore_consistency:
6890         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6891                              " Proceeding anyway. Please make sure node"
6892                              " %s is down. Error details: %s",
6893                              instance.name, source_node, source_node, msg)
6894       else:
6895         raise errors.OpExecError("Could not shutdown instance %s on"
6896                                  " node %s: %s" %
6897                                  (instance.name, source_node, msg))
6898
6899     # create the target disks
6900     try:
6901       _CreateDisks(self, instance, target_node=target_node)
6902     except errors.OpExecError:
6903       self.LogWarning("Device creation failed, reverting...")
6904       try:
6905         _RemoveDisks(self, instance, target_node=target_node)
6906       finally:
6907         self.cfg.ReleaseDRBDMinors(instance.name)
6908         raise
6909
6910     cluster_name = self.cfg.GetClusterInfo().cluster_name
6911
6912     errs = []
6913     # activate, get path, copy the data over
6914     for idx, disk in enumerate(instance.disks):
6915       self.LogInfo("Copying data for disk %d", idx)
6916       result = self.rpc.call_blockdev_assemble(target_node, disk,
6917                                                instance.name, True, idx)
6918       if result.fail_msg:
6919         self.LogWarning("Can't assemble newly created disk %d: %s",
6920                         idx, result.fail_msg)
6921         errs.append(result.fail_msg)
6922         break
6923       dev_path = result.payload
6924       result = self.rpc.call_blockdev_export(source_node, disk,
6925                                              target_node, dev_path,
6926                                              cluster_name)
6927       if result.fail_msg:
6928         self.LogWarning("Can't copy data over for disk %d: %s",
6929                         idx, result.fail_msg)
6930         errs.append(result.fail_msg)
6931         break
6932
6933     if errs:
6934       self.LogWarning("Some disks failed to copy, aborting")
6935       try:
6936         _RemoveDisks(self, instance, target_node=target_node)
6937       finally:
6938         self.cfg.ReleaseDRBDMinors(instance.name)
6939         raise errors.OpExecError("Errors during disk copy: %s" %
6940                                  (",".join(errs),))
6941
6942     instance.primary_node = target_node
6943     self.cfg.Update(instance, feedback_fn)
6944
6945     self.LogInfo("Removing the disks on the original node")
6946     _RemoveDisks(self, instance, target_node=source_node)
6947
6948     # Only start the instance if it's marked as up
6949     if instance.admin_up:
6950       self.LogInfo("Starting instance %s on node %s",
6951                    instance.name, target_node)
6952
6953       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6954                                            ignore_secondaries=True)
6955       if not disks_ok:
6956         _ShutdownInstanceDisks(self, instance)
6957         raise errors.OpExecError("Can't activate the instance's disks")
6958
6959       result = self.rpc.call_instance_start(target_node, instance,
6960                                             None, None, False)
6961       msg = result.fail_msg
6962       if msg:
6963         _ShutdownInstanceDisks(self, instance)
6964         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6965                                  (instance.name, target_node, msg))
6966
6967
6968 class LUNodeMigrate(LogicalUnit):
6969   """Migrate all instances from a node.
6970
6971   """
6972   HPATH = "node-migrate"
6973   HTYPE = constants.HTYPE_NODE
6974   REQ_BGL = False
6975
6976   def CheckArguments(self):
6977     pass
6978
6979   def ExpandNames(self):
6980     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6981
6982     self.share_locks = _ShareAll()
6983     self.needed_locks = {
6984       locking.LEVEL_NODE: [self.op.node_name],
6985       }
6986
6987   def BuildHooksEnv(self):
6988     """Build hooks env.
6989
6990     This runs on the master, the primary and all the secondaries.
6991
6992     """
6993     return {
6994       "NODE_NAME": self.op.node_name,
6995       }
6996
6997   def BuildHooksNodes(self):
6998     """Build hooks nodes.
6999
7000     """
7001     nl = [self.cfg.GetMasterNode()]
7002     return (nl, nl)
7003
7004   def CheckPrereq(self):
7005     pass
7006
7007   def Exec(self, feedback_fn):
7008     # Prepare jobs for migration instances
7009     jobs = [
7010       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7011                                  mode=self.op.mode,
7012                                  live=self.op.live,
7013                                  iallocator=self.op.iallocator,
7014                                  target_node=self.op.target_node)]
7015       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7016       ]
7017
7018     # TODO: Run iallocator in this opcode and pass correct placement options to
7019     # OpInstanceMigrate. Since other jobs can modify the cluster between
7020     # running the iallocator and the actual migration, a good consistency model
7021     # will have to be found.
7022
7023     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7024             frozenset([self.op.node_name]))
7025
7026     return ResultWithJobs(jobs)
7027
7028
7029 class TLMigrateInstance(Tasklet):
7030   """Tasklet class for instance migration.
7031
7032   @type live: boolean
7033   @ivar live: whether the migration will be done live or non-live;
7034       this variable is initalized only after CheckPrereq has run
7035   @type cleanup: boolean
7036   @ivar cleanup: Wheater we cleanup from a failed migration
7037   @type iallocator: string
7038   @ivar iallocator: The iallocator used to determine target_node
7039   @type target_node: string
7040   @ivar target_node: If given, the target_node to reallocate the instance to
7041   @type failover: boolean
7042   @ivar failover: Whether operation results in failover or migration
7043   @type fallback: boolean
7044   @ivar fallback: Whether fallback to failover is allowed if migration not
7045                   possible
7046   @type ignore_consistency: boolean
7047   @ivar ignore_consistency: Wheter we should ignore consistency between source
7048                             and target node
7049   @type shutdown_timeout: int
7050   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7051
7052   """
7053   def __init__(self, lu, instance_name, cleanup=False,
7054                failover=False, fallback=False,
7055                ignore_consistency=False,
7056                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7057     """Initializes this class.
7058
7059     """
7060     Tasklet.__init__(self, lu)
7061
7062     # Parameters
7063     self.instance_name = instance_name
7064     self.cleanup = cleanup
7065     self.live = False # will be overridden later
7066     self.failover = failover
7067     self.fallback = fallback
7068     self.ignore_consistency = ignore_consistency
7069     self.shutdown_timeout = shutdown_timeout
7070
7071   def CheckPrereq(self):
7072     """Check prerequisites.
7073
7074     This checks that the instance is in the cluster.
7075
7076     """
7077     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7078     instance = self.cfg.GetInstanceInfo(instance_name)
7079     assert instance is not None
7080     self.instance = instance
7081
7082     if (not self.cleanup and not instance.admin_up and not self.failover and
7083         self.fallback):
7084       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7085                       " to failover")
7086       self.failover = True
7087
7088     if instance.disk_template not in constants.DTS_MIRRORED:
7089       if self.failover:
7090         text = "failovers"
7091       else:
7092         text = "migrations"
7093       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7094                                  " %s" % (instance.disk_template, text),
7095                                  errors.ECODE_STATE)
7096
7097     if instance.disk_template in constants.DTS_EXT_MIRROR:
7098       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7099
7100       if self.lu.op.iallocator:
7101         self._RunAllocator()
7102       else:
7103         # We set set self.target_node as it is required by
7104         # BuildHooksEnv
7105         self.target_node = self.lu.op.target_node
7106
7107       # self.target_node is already populated, either directly or by the
7108       # iallocator run
7109       target_node = self.target_node
7110       if self.target_node == instance.primary_node:
7111         raise errors.OpPrereqError("Cannot migrate instance %s"
7112                                    " to its primary (%s)" %
7113                                    (instance.name, instance.primary_node))
7114
7115       if len(self.lu.tasklets) == 1:
7116         # It is safe to release locks only when we're the only tasklet
7117         # in the LU
7118         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7119                       keep=[instance.primary_node, self.target_node])
7120
7121     else:
7122       secondary_nodes = instance.secondary_nodes
7123       if not secondary_nodes:
7124         raise errors.ConfigurationError("No secondary node but using"
7125                                         " %s disk template" %
7126                                         instance.disk_template)
7127       target_node = secondary_nodes[0]
7128       if self.lu.op.iallocator or (self.lu.op.target_node and
7129                                    self.lu.op.target_node != target_node):
7130         if self.failover:
7131           text = "failed over"
7132         else:
7133           text = "migrated"
7134         raise errors.OpPrereqError("Instances with disk template %s cannot"
7135                                    " be %s to arbitrary nodes"
7136                                    " (neither an iallocator nor a target"
7137                                    " node can be passed)" %
7138                                    (instance.disk_template, text),
7139                                    errors.ECODE_INVAL)
7140
7141     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7142
7143     # check memory requirements on the secondary node
7144     if not self.cleanup and (not self.failover or instance.admin_up):
7145       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7146                            instance.name, i_be[constants.BE_MEMORY],
7147                            instance.hypervisor)
7148     else:
7149       self.lu.LogInfo("Not checking memory on the secondary node as"
7150                       " instance will not be started")
7151
7152     # check bridge existance
7153     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7154
7155     if not self.cleanup:
7156       _CheckNodeNotDrained(self.lu, target_node)
7157       if not self.failover:
7158         result = self.rpc.call_instance_migratable(instance.primary_node,
7159                                                    instance)
7160         if result.fail_msg and self.fallback:
7161           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7162                           " failover")
7163           self.failover = True
7164         else:
7165           result.Raise("Can't migrate, please use failover",
7166                        prereq=True, ecode=errors.ECODE_STATE)
7167
7168     assert not (self.failover and self.cleanup)
7169
7170     if not self.failover:
7171       if self.lu.op.live is not None and self.lu.op.mode is not None:
7172         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7173                                    " parameters are accepted",
7174                                    errors.ECODE_INVAL)
7175       if self.lu.op.live is not None:
7176         if self.lu.op.live:
7177           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7178         else:
7179           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7180         # reset the 'live' parameter to None so that repeated
7181         # invocations of CheckPrereq do not raise an exception
7182         self.lu.op.live = None
7183       elif self.lu.op.mode is None:
7184         # read the default value from the hypervisor
7185         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7186                                                 skip_globals=False)
7187         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7188
7189       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7190     else:
7191       # Failover is never live
7192       self.live = False
7193
7194   def _RunAllocator(self):
7195     """Run the allocator based on input opcode.
7196
7197     """
7198     ial = IAllocator(self.cfg, self.rpc,
7199                      mode=constants.IALLOCATOR_MODE_RELOC,
7200                      name=self.instance_name,
7201                      # TODO See why hail breaks with a single node below
7202                      relocate_from=[self.instance.primary_node,
7203                                     self.instance.primary_node],
7204                      )
7205
7206     ial.Run(self.lu.op.iallocator)
7207
7208     if not ial.success:
7209       raise errors.OpPrereqError("Can't compute nodes using"
7210                                  " iallocator '%s': %s" %
7211                                  (self.lu.op.iallocator, ial.info),
7212                                  errors.ECODE_NORES)
7213     if len(ial.result) != ial.required_nodes:
7214       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7215                                  " of nodes (%s), required %s" %
7216                                  (self.lu.op.iallocator, len(ial.result),
7217                                   ial.required_nodes), errors.ECODE_FAULT)
7218     self.target_node = ial.result[0]
7219     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7220                  self.instance_name, self.lu.op.iallocator,
7221                  utils.CommaJoin(ial.result))
7222
7223   def _WaitUntilSync(self):
7224     """Poll with custom rpc for disk sync.
7225
7226     This uses our own step-based rpc call.
7227
7228     """
7229     self.feedback_fn("* wait until resync is done")
7230     all_done = False
7231     while not all_done:
7232       all_done = True
7233       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7234                                             self.nodes_ip,
7235                                             self.instance.disks)
7236       min_percent = 100
7237       for node, nres in result.items():
7238         nres.Raise("Cannot resync disks on node %s" % node)
7239         node_done, node_percent = nres.payload
7240         all_done = all_done and node_done
7241         if node_percent is not None:
7242           min_percent = min(min_percent, node_percent)
7243       if not all_done:
7244         if min_percent < 100:
7245           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7246         time.sleep(2)
7247
7248   def _EnsureSecondary(self, node):
7249     """Demote a node to secondary.
7250
7251     """
7252     self.feedback_fn("* switching node %s to secondary mode" % node)
7253
7254     for dev in self.instance.disks:
7255       self.cfg.SetDiskID(dev, node)
7256
7257     result = self.rpc.call_blockdev_close(node, self.instance.name,
7258                                           self.instance.disks)
7259     result.Raise("Cannot change disk to secondary on node %s" % node)
7260
7261   def _GoStandalone(self):
7262     """Disconnect from the network.
7263
7264     """
7265     self.feedback_fn("* changing into standalone mode")
7266     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7267                                                self.instance.disks)
7268     for node, nres in result.items():
7269       nres.Raise("Cannot disconnect disks node %s" % node)
7270
7271   def _GoReconnect(self, multimaster):
7272     """Reconnect to the network.
7273
7274     """
7275     if multimaster:
7276       msg = "dual-master"
7277     else:
7278       msg = "single-master"
7279     self.feedback_fn("* changing disks into %s mode" % msg)
7280     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7281                                            self.instance.disks,
7282                                            self.instance.name, multimaster)
7283     for node, nres in result.items():
7284       nres.Raise("Cannot change disks config on node %s" % node)
7285
7286   def _ExecCleanup(self):
7287     """Try to cleanup after a failed migration.
7288
7289     The cleanup is done by:
7290       - check that the instance is running only on one node
7291         (and update the config if needed)
7292       - change disks on its secondary node to secondary
7293       - wait until disks are fully synchronized
7294       - disconnect from the network
7295       - change disks into single-master mode
7296       - wait again until disks are fully synchronized
7297
7298     """
7299     instance = self.instance
7300     target_node = self.target_node
7301     source_node = self.source_node
7302
7303     # check running on only one node
7304     self.feedback_fn("* checking where the instance actually runs"
7305                      " (if this hangs, the hypervisor might be in"
7306                      " a bad state)")
7307     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7308     for node, result in ins_l.items():
7309       result.Raise("Can't contact node %s" % node)
7310
7311     runningon_source = instance.name in ins_l[source_node].payload
7312     runningon_target = instance.name in ins_l[target_node].payload
7313
7314     if runningon_source and runningon_target:
7315       raise errors.OpExecError("Instance seems to be running on two nodes,"
7316                                " or the hypervisor is confused; you will have"
7317                                " to ensure manually that it runs only on one"
7318                                " and restart this operation")
7319
7320     if not (runningon_source or runningon_target):
7321       raise errors.OpExecError("Instance does not seem to be running at all;"
7322                                " in this case it's safer to repair by"
7323                                " running 'gnt-instance stop' to ensure disk"
7324                                " shutdown, and then restarting it")
7325
7326     if runningon_target:
7327       # the migration has actually succeeded, we need to update the config
7328       self.feedback_fn("* instance running on secondary node (%s),"
7329                        " updating config" % target_node)
7330       instance.primary_node = target_node
7331       self.cfg.Update(instance, self.feedback_fn)
7332       demoted_node = source_node
7333     else:
7334       self.feedback_fn("* instance confirmed to be running on its"
7335                        " primary node (%s)" % source_node)
7336       demoted_node = target_node
7337
7338     if instance.disk_template in constants.DTS_INT_MIRROR:
7339       self._EnsureSecondary(demoted_node)
7340       try:
7341         self._WaitUntilSync()
7342       except errors.OpExecError:
7343         # we ignore here errors, since if the device is standalone, it
7344         # won't be able to sync
7345         pass
7346       self._GoStandalone()
7347       self._GoReconnect(False)
7348       self._WaitUntilSync()
7349
7350     self.feedback_fn("* done")
7351
7352   def _RevertDiskStatus(self):
7353     """Try to revert the disk status after a failed migration.
7354
7355     """
7356     target_node = self.target_node
7357     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7358       return
7359
7360     try:
7361       self._EnsureSecondary(target_node)
7362       self._GoStandalone()
7363       self._GoReconnect(False)
7364       self._WaitUntilSync()
7365     except errors.OpExecError, err:
7366       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7367                          " please try to recover the instance manually;"
7368                          " error '%s'" % str(err))
7369
7370   def _AbortMigration(self):
7371     """Call the hypervisor code to abort a started migration.
7372
7373     """
7374     instance = self.instance
7375     target_node = self.target_node
7376     migration_info = self.migration_info
7377
7378     abort_result = self.rpc.call_finalize_migration(target_node,
7379                                                     instance,
7380                                                     migration_info,
7381                                                     False)
7382     abort_msg = abort_result.fail_msg
7383     if abort_msg:
7384       logging.error("Aborting migration failed on target node %s: %s",
7385                     target_node, abort_msg)
7386       # Don't raise an exception here, as we stil have to try to revert the
7387       # disk status, even if this step failed.
7388
7389   def _ExecMigration(self):
7390     """Migrate an instance.
7391
7392     The migrate is done by:
7393       - change the disks into dual-master mode
7394       - wait until disks are fully synchronized again
7395       - migrate the instance
7396       - change disks on the new secondary node (the old primary) to secondary
7397       - wait until disks are fully synchronized
7398       - change disks into single-master mode
7399
7400     """
7401     instance = self.instance
7402     target_node = self.target_node
7403     source_node = self.source_node
7404
7405     self.feedback_fn("* checking disk consistency between source and target")
7406     for dev in instance.disks:
7407       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7408         raise errors.OpExecError("Disk %s is degraded or not fully"
7409                                  " synchronized on target node,"
7410                                  " aborting migration" % dev.iv_name)
7411
7412     # First get the migration information from the remote node
7413     result = self.rpc.call_migration_info(source_node, instance)
7414     msg = result.fail_msg
7415     if msg:
7416       log_err = ("Failed fetching source migration information from %s: %s" %
7417                  (source_node, msg))
7418       logging.error(log_err)
7419       raise errors.OpExecError(log_err)
7420
7421     self.migration_info = migration_info = result.payload
7422
7423     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7424       # Then switch the disks to master/master mode
7425       self._EnsureSecondary(target_node)
7426       self._GoStandalone()
7427       self._GoReconnect(True)
7428       self._WaitUntilSync()
7429
7430     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7431     result = self.rpc.call_accept_instance(target_node,
7432                                            instance,
7433                                            migration_info,
7434                                            self.nodes_ip[target_node])
7435
7436     msg = result.fail_msg
7437     if msg:
7438       logging.error("Instance pre-migration failed, trying to revert"
7439                     " disk status: %s", msg)
7440       self.feedback_fn("Pre-migration failed, aborting")
7441       self._AbortMigration()
7442       self._RevertDiskStatus()
7443       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7444                                (instance.name, msg))
7445
7446     self.feedback_fn("* migrating instance to %s" % target_node)
7447     result = self.rpc.call_instance_migrate(source_node, instance,
7448                                             self.nodes_ip[target_node],
7449                                             self.live)
7450     msg = result.fail_msg
7451     if msg:
7452       logging.error("Instance migration failed, trying to revert"
7453                     " disk status: %s", msg)
7454       self.feedback_fn("Migration failed, aborting")
7455       self._AbortMigration()
7456       self._RevertDiskStatus()
7457       raise errors.OpExecError("Could not migrate instance %s: %s" %
7458                                (instance.name, msg))
7459
7460     instance.primary_node = target_node
7461     # distribute new instance config to the other nodes
7462     self.cfg.Update(instance, self.feedback_fn)
7463
7464     result = self.rpc.call_finalize_migration(target_node,
7465                                               instance,
7466                                               migration_info,
7467                                               True)
7468     msg = result.fail_msg
7469     if msg:
7470       logging.error("Instance migration succeeded, but finalization failed:"
7471                     " %s", msg)
7472       raise errors.OpExecError("Could not finalize instance migration: %s" %
7473                                msg)
7474
7475     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7476       self._EnsureSecondary(source_node)
7477       self._WaitUntilSync()
7478       self._GoStandalone()
7479       self._GoReconnect(False)
7480       self._WaitUntilSync()
7481
7482     self.feedback_fn("* done")
7483
7484   def _ExecFailover(self):
7485     """Failover an instance.
7486
7487     The failover is done by shutting it down on its present node and
7488     starting it on the secondary.
7489
7490     """
7491     instance = self.instance
7492     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7493
7494     source_node = instance.primary_node
7495     target_node = self.target_node
7496
7497     if instance.admin_up:
7498       self.feedback_fn("* checking disk consistency between source and target")
7499       for dev in instance.disks:
7500         # for drbd, these are drbd over lvm
7501         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7502           if primary_node.offline:
7503             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7504                              " target node %s" %
7505                              (primary_node.name, dev.iv_name, target_node))
7506           elif not self.ignore_consistency:
7507             raise errors.OpExecError("Disk %s is degraded on target node,"
7508                                      " aborting failover" % dev.iv_name)
7509     else:
7510       self.feedback_fn("* not checking disk consistency as instance is not"
7511                        " running")
7512
7513     self.feedback_fn("* shutting down instance on source node")
7514     logging.info("Shutting down instance %s on node %s",
7515                  instance.name, source_node)
7516
7517     result = self.rpc.call_instance_shutdown(source_node, instance,
7518                                              self.shutdown_timeout)
7519     msg = result.fail_msg
7520     if msg:
7521       if self.ignore_consistency or primary_node.offline:
7522         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7523                            " proceeding anyway; please make sure node"
7524                            " %s is down; error details: %s",
7525                            instance.name, source_node, source_node, msg)
7526       else:
7527         raise errors.OpExecError("Could not shutdown instance %s on"
7528                                  " node %s: %s" %
7529                                  (instance.name, source_node, msg))
7530
7531     self.feedback_fn("* deactivating the instance's disks on source node")
7532     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7533       raise errors.OpExecError("Can't shut down the instance's disks")
7534
7535     instance.primary_node = target_node
7536     # distribute new instance config to the other nodes
7537     self.cfg.Update(instance, self.feedback_fn)
7538
7539     # Only start the instance if it's marked as up
7540     if instance.admin_up:
7541       self.feedback_fn("* activating the instance's disks on target node %s" %
7542                        target_node)
7543       logging.info("Starting instance %s on node %s",
7544                    instance.name, target_node)
7545
7546       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7547                                            ignore_secondaries=True)
7548       if not disks_ok:
7549         _ShutdownInstanceDisks(self.lu, instance)
7550         raise errors.OpExecError("Can't activate the instance's disks")
7551
7552       self.feedback_fn("* starting the instance on the target node %s" %
7553                        target_node)
7554       result = self.rpc.call_instance_start(target_node, instance, None, None,
7555                                             False)
7556       msg = result.fail_msg
7557       if msg:
7558         _ShutdownInstanceDisks(self.lu, instance)
7559         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7560                                  (instance.name, target_node, msg))
7561
7562   def Exec(self, feedback_fn):
7563     """Perform the migration.
7564
7565     """
7566     self.feedback_fn = feedback_fn
7567     self.source_node = self.instance.primary_node
7568
7569     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7570     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7571       self.target_node = self.instance.secondary_nodes[0]
7572       # Otherwise self.target_node has been populated either
7573       # directly, or through an iallocator.
7574
7575     self.all_nodes = [self.source_node, self.target_node]
7576     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7577                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7578
7579     if self.failover:
7580       feedback_fn("Failover instance %s" % self.instance.name)
7581       self._ExecFailover()
7582     else:
7583       feedback_fn("Migrating instance %s" % self.instance.name)
7584
7585       if self.cleanup:
7586         return self._ExecCleanup()
7587       else:
7588         return self._ExecMigration()
7589
7590
7591 def _CreateBlockDev(lu, node, instance, device, force_create,
7592                     info, force_open):
7593   """Create a tree of block devices on a given node.
7594
7595   If this device type has to be created on secondaries, create it and
7596   all its children.
7597
7598   If not, just recurse to children keeping the same 'force' value.
7599
7600   @param lu: the lu on whose behalf we execute
7601   @param node: the node on which to create the device
7602   @type instance: L{objects.Instance}
7603   @param instance: the instance which owns the device
7604   @type device: L{objects.Disk}
7605   @param device: the device to create
7606   @type force_create: boolean
7607   @param force_create: whether to force creation of this device; this
7608       will be change to True whenever we find a device which has
7609       CreateOnSecondary() attribute
7610   @param info: the extra 'metadata' we should attach to the device
7611       (this will be represented as a LVM tag)
7612   @type force_open: boolean
7613   @param force_open: this parameter will be passes to the
7614       L{backend.BlockdevCreate} function where it specifies
7615       whether we run on primary or not, and it affects both
7616       the child assembly and the device own Open() execution
7617
7618   """
7619   if device.CreateOnSecondary():
7620     force_create = True
7621
7622   if device.children:
7623     for child in device.children:
7624       _CreateBlockDev(lu, node, instance, child, force_create,
7625                       info, force_open)
7626
7627   if not force_create:
7628     return
7629
7630   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7631
7632
7633 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7634   """Create a single block device on a given node.
7635
7636   This will not recurse over children of the device, so they must be
7637   created in advance.
7638
7639   @param lu: the lu on whose behalf we execute
7640   @param node: the node on which to create the device
7641   @type instance: L{objects.Instance}
7642   @param instance: the instance which owns the device
7643   @type device: L{objects.Disk}
7644   @param device: the device to create
7645   @param info: the extra 'metadata' we should attach to the device
7646       (this will be represented as a LVM tag)
7647   @type force_open: boolean
7648   @param force_open: this parameter will be passes to the
7649       L{backend.BlockdevCreate} function where it specifies
7650       whether we run on primary or not, and it affects both
7651       the child assembly and the device own Open() execution
7652
7653   """
7654   lu.cfg.SetDiskID(device, node)
7655   result = lu.rpc.call_blockdev_create(node, device, device.size,
7656                                        instance.name, force_open, info)
7657   result.Raise("Can't create block device %s on"
7658                " node %s for instance %s" % (device, node, instance.name))
7659   if device.physical_id is None:
7660     device.physical_id = result.payload
7661
7662
7663 def _GenerateUniqueNames(lu, exts):
7664   """Generate a suitable LV name.
7665
7666   This will generate a logical volume name for the given instance.
7667
7668   """
7669   results = []
7670   for val in exts:
7671     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7672     results.append("%s%s" % (new_id, val))
7673   return results
7674
7675
7676 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7677                          iv_name, p_minor, s_minor):
7678   """Generate a drbd8 device complete with its children.
7679
7680   """
7681   assert len(vgnames) == len(names) == 2
7682   port = lu.cfg.AllocatePort()
7683   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7684   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7685                           logical_id=(vgnames[0], names[0]))
7686   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7687                           logical_id=(vgnames[1], names[1]))
7688   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7689                           logical_id=(primary, secondary, port,
7690                                       p_minor, s_minor,
7691                                       shared_secret),
7692                           children=[dev_data, dev_meta],
7693                           iv_name=iv_name)
7694   return drbd_dev
7695
7696
7697 def _GenerateDiskTemplate(lu, template_name,
7698                           instance_name, primary_node,
7699                           secondary_nodes, disk_info,
7700                           file_storage_dir, file_driver,
7701                           base_index, feedback_fn):
7702   """Generate the entire disk layout for a given template type.
7703
7704   """
7705   #TODO: compute space requirements
7706
7707   vgname = lu.cfg.GetVGName()
7708   disk_count = len(disk_info)
7709   disks = []
7710   if template_name == constants.DT_DISKLESS:
7711     pass
7712   elif template_name == constants.DT_PLAIN:
7713     if len(secondary_nodes) != 0:
7714       raise errors.ProgrammerError("Wrong template configuration")
7715
7716     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7717                                       for i in range(disk_count)])
7718     for idx, disk in enumerate(disk_info):
7719       disk_index = idx + base_index
7720       vg = disk.get(constants.IDISK_VG, vgname)
7721       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7722       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7723                               size=disk[constants.IDISK_SIZE],
7724                               logical_id=(vg, names[idx]),
7725                               iv_name="disk/%d" % disk_index,
7726                               mode=disk[constants.IDISK_MODE])
7727       disks.append(disk_dev)
7728   elif template_name == constants.DT_DRBD8:
7729     if len(secondary_nodes) != 1:
7730       raise errors.ProgrammerError("Wrong template configuration")
7731     remote_node = secondary_nodes[0]
7732     minors = lu.cfg.AllocateDRBDMinor(
7733       [primary_node, remote_node] * len(disk_info), instance_name)
7734
7735     names = []
7736     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7737                                                for i in range(disk_count)]):
7738       names.append(lv_prefix + "_data")
7739       names.append(lv_prefix + "_meta")
7740     for idx, disk in enumerate(disk_info):
7741       disk_index = idx + base_index
7742       data_vg = disk.get(constants.IDISK_VG, vgname)
7743       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7744       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7745                                       disk[constants.IDISK_SIZE],
7746                                       [data_vg, meta_vg],
7747                                       names[idx * 2:idx * 2 + 2],
7748                                       "disk/%d" % disk_index,
7749                                       minors[idx * 2], minors[idx * 2 + 1])
7750       disk_dev.mode = disk[constants.IDISK_MODE]
7751       disks.append(disk_dev)
7752   elif template_name == constants.DT_FILE:
7753     if len(secondary_nodes) != 0:
7754       raise errors.ProgrammerError("Wrong template configuration")
7755
7756     opcodes.RequireFileStorage()
7757
7758     for idx, disk in enumerate(disk_info):
7759       disk_index = idx + base_index
7760       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7761                               size=disk[constants.IDISK_SIZE],
7762                               iv_name="disk/%d" % disk_index,
7763                               logical_id=(file_driver,
7764                                           "%s/disk%d" % (file_storage_dir,
7765                                                          disk_index)),
7766                               mode=disk[constants.IDISK_MODE])
7767       disks.append(disk_dev)
7768   elif template_name == constants.DT_SHARED_FILE:
7769     if len(secondary_nodes) != 0:
7770       raise errors.ProgrammerError("Wrong template configuration")
7771
7772     opcodes.RequireSharedFileStorage()
7773
7774     for idx, disk in enumerate(disk_info):
7775       disk_index = idx + base_index
7776       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7777                               size=disk[constants.IDISK_SIZE],
7778                               iv_name="disk/%d" % disk_index,
7779                               logical_id=(file_driver,
7780                                           "%s/disk%d" % (file_storage_dir,
7781                                                          disk_index)),
7782                               mode=disk[constants.IDISK_MODE])
7783       disks.append(disk_dev)
7784   elif template_name == constants.DT_BLOCK:
7785     if len(secondary_nodes) != 0:
7786       raise errors.ProgrammerError("Wrong template configuration")
7787
7788     for idx, disk in enumerate(disk_info):
7789       disk_index = idx + base_index
7790       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7791                               size=disk[constants.IDISK_SIZE],
7792                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7793                                           disk[constants.IDISK_ADOPT]),
7794                               iv_name="disk/%d" % disk_index,
7795                               mode=disk[constants.IDISK_MODE])
7796       disks.append(disk_dev)
7797
7798   else:
7799     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7800   return disks
7801
7802
7803 def _GetInstanceInfoText(instance):
7804   """Compute that text that should be added to the disk's metadata.
7805
7806   """
7807   return "originstname+%s" % instance.name
7808
7809
7810 def _CalcEta(time_taken, written, total_size):
7811   """Calculates the ETA based on size written and total size.
7812
7813   @param time_taken: The time taken so far
7814   @param written: amount written so far
7815   @param total_size: The total size of data to be written
7816   @return: The remaining time in seconds
7817
7818   """
7819   avg_time = time_taken / float(written)
7820   return (total_size - written) * avg_time
7821
7822
7823 def _WipeDisks(lu, instance):
7824   """Wipes instance disks.
7825
7826   @type lu: L{LogicalUnit}
7827   @param lu: the logical unit on whose behalf we execute
7828   @type instance: L{objects.Instance}
7829   @param instance: the instance whose disks we should create
7830   @return: the success of the wipe
7831
7832   """
7833   node = instance.primary_node
7834
7835   for device in instance.disks:
7836     lu.cfg.SetDiskID(device, node)
7837
7838   logging.info("Pause sync of instance %s disks", instance.name)
7839   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7840
7841   for idx, success in enumerate(result.payload):
7842     if not success:
7843       logging.warn("pause-sync of instance %s for disks %d failed",
7844                    instance.name, idx)
7845
7846   try:
7847     for idx, device in enumerate(instance.disks):
7848       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7849       # MAX_WIPE_CHUNK at max
7850       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7851                             constants.MIN_WIPE_CHUNK_PERCENT)
7852       # we _must_ make this an int, otherwise rounding errors will
7853       # occur
7854       wipe_chunk_size = int(wipe_chunk_size)
7855
7856       lu.LogInfo("* Wiping disk %d", idx)
7857       logging.info("Wiping disk %d for instance %s, node %s using"
7858                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7859
7860       offset = 0
7861       size = device.size
7862       last_output = 0
7863       start_time = time.time()
7864
7865       while offset < size:
7866         wipe_size = min(wipe_chunk_size, size - offset)
7867         logging.debug("Wiping disk %d, offset %s, chunk %s",
7868                       idx, offset, wipe_size)
7869         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7870         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7871                      (idx, offset, wipe_size))
7872         now = time.time()
7873         offset += wipe_size
7874         if now - last_output >= 60:
7875           eta = _CalcEta(now - start_time, offset, size)
7876           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7877                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7878           last_output = now
7879   finally:
7880     logging.info("Resume sync of instance %s disks", instance.name)
7881
7882     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7883
7884     for idx, success in enumerate(result.payload):
7885       if not success:
7886         lu.LogWarning("Resume sync of disk %d failed, please have a"
7887                       " look at the status and troubleshoot the issue", idx)
7888         logging.warn("resume-sync of instance %s for disks %d failed",
7889                      instance.name, idx)
7890
7891
7892 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7893   """Create all disks for an instance.
7894
7895   This abstracts away some work from AddInstance.
7896
7897   @type lu: L{LogicalUnit}
7898   @param lu: the logical unit on whose behalf we execute
7899   @type instance: L{objects.Instance}
7900   @param instance: the instance whose disks we should create
7901   @type to_skip: list
7902   @param to_skip: list of indices to skip
7903   @type target_node: string
7904   @param target_node: if passed, overrides the target node for creation
7905   @rtype: boolean
7906   @return: the success of the creation
7907
7908   """
7909   info = _GetInstanceInfoText(instance)
7910   if target_node is None:
7911     pnode = instance.primary_node
7912     all_nodes = instance.all_nodes
7913   else:
7914     pnode = target_node
7915     all_nodes = [pnode]
7916
7917   if instance.disk_template in constants.DTS_FILEBASED:
7918     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7919     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7920
7921     result.Raise("Failed to create directory '%s' on"
7922                  " node %s" % (file_storage_dir, pnode))
7923
7924   # Note: this needs to be kept in sync with adding of disks in
7925   # LUInstanceSetParams
7926   for idx, device in enumerate(instance.disks):
7927     if to_skip and idx in to_skip:
7928       continue
7929     logging.info("Creating volume %s for instance %s",
7930                  device.iv_name, instance.name)
7931     #HARDCODE
7932     for node in all_nodes:
7933       f_create = node == pnode
7934       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7935
7936
7937 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
7938   """Remove all disks for an instance.
7939
7940   This abstracts away some work from `AddInstance()` and
7941   `RemoveInstance()`. Note that in case some of the devices couldn't
7942   be removed, the removal will continue with the other ones (compare
7943   with `_CreateDisks()`).
7944
7945   @type lu: L{LogicalUnit}
7946   @param lu: the logical unit on whose behalf we execute
7947   @type instance: L{objects.Instance}
7948   @param instance: the instance whose disks we should remove
7949   @type target_node: string
7950   @param target_node: used to override the node on which to remove the disks
7951   @rtype: boolean
7952   @return: the success of the removal
7953
7954   """
7955   logging.info("Removing block devices for instance %s", instance.name)
7956
7957   all_result = True
7958   ports_to_release = set()
7959   for device in instance.disks:
7960     if target_node:
7961       edata = [(target_node, device)]
7962     else:
7963       edata = device.ComputeNodeTree(instance.primary_node)
7964     for node, disk in edata:
7965       lu.cfg.SetDiskID(disk, node)
7966       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7967       if msg:
7968         lu.LogWarning("Could not remove block device %s on node %s,"
7969                       " continuing anyway: %s", device.iv_name, node, msg)
7970         all_result = False
7971
7972     # if this is a DRBD disk, return its port to the pool
7973     if device.dev_type in constants.LDS_DRBD:
7974       ports_to_release.add(device.logical_id[2])
7975
7976   if all_result or ignore_failures:
7977     for port in ports_to_release:
7978       lu.cfg.AddTcpUdpPort(port)
7979
7980   if instance.disk_template == constants.DT_FILE:
7981     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7982     if target_node:
7983       tgt = target_node
7984     else:
7985       tgt = instance.primary_node
7986     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7987     if result.fail_msg:
7988       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7989                     file_storage_dir, instance.primary_node, result.fail_msg)
7990       all_result = False
7991
7992   return all_result
7993
7994
7995 def _ComputeDiskSizePerVG(disk_template, disks):
7996   """Compute disk size requirements in the volume group
7997
7998   """
7999   def _compute(disks, payload):
8000     """Universal algorithm.
8001
8002     """
8003     vgs = {}
8004     for disk in disks:
8005       vgs[disk[constants.IDISK_VG]] = \
8006         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8007
8008     return vgs
8009
8010   # Required free disk space as a function of disk and swap space
8011   req_size_dict = {
8012     constants.DT_DISKLESS: {},
8013     constants.DT_PLAIN: _compute(disks, 0),
8014     # 128 MB are added for drbd metadata for each disk
8015     constants.DT_DRBD8: _compute(disks, 128),
8016     constants.DT_FILE: {},
8017     constants.DT_SHARED_FILE: {},
8018   }
8019
8020   if disk_template not in req_size_dict:
8021     raise errors.ProgrammerError("Disk template '%s' size requirement"
8022                                  " is unknown" % disk_template)
8023
8024   return req_size_dict[disk_template]
8025
8026
8027 def _ComputeDiskSize(disk_template, disks):
8028   """Compute disk size requirements in the volume group
8029
8030   """
8031   # Required free disk space as a function of disk and swap space
8032   req_size_dict = {
8033     constants.DT_DISKLESS: None,
8034     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8035     # 128 MB are added for drbd metadata for each disk
8036     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8037     constants.DT_FILE: None,
8038     constants.DT_SHARED_FILE: 0,
8039     constants.DT_BLOCK: 0,
8040   }
8041
8042   if disk_template not in req_size_dict:
8043     raise errors.ProgrammerError("Disk template '%s' size requirement"
8044                                  " is unknown" % disk_template)
8045
8046   return req_size_dict[disk_template]
8047
8048
8049 def _FilterVmNodes(lu, nodenames):
8050   """Filters out non-vm_capable nodes from a list.
8051
8052   @type lu: L{LogicalUnit}
8053   @param lu: the logical unit for which we check
8054   @type nodenames: list
8055   @param nodenames: the list of nodes on which we should check
8056   @rtype: list
8057   @return: the list of vm-capable nodes
8058
8059   """
8060   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8061   return [name for name in nodenames if name not in vm_nodes]
8062
8063
8064 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8065   """Hypervisor parameter validation.
8066
8067   This function abstract the hypervisor parameter validation to be
8068   used in both instance create and instance modify.
8069
8070   @type lu: L{LogicalUnit}
8071   @param lu: the logical unit for which we check
8072   @type nodenames: list
8073   @param nodenames: the list of nodes on which we should check
8074   @type hvname: string
8075   @param hvname: the name of the hypervisor we should use
8076   @type hvparams: dict
8077   @param hvparams: the parameters which we need to check
8078   @raise errors.OpPrereqError: if the parameters are not valid
8079
8080   """
8081   nodenames = _FilterVmNodes(lu, nodenames)
8082   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8083                                                   hvname,
8084                                                   hvparams)
8085   for node in nodenames:
8086     info = hvinfo[node]
8087     if info.offline:
8088       continue
8089     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8090
8091
8092 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8093   """OS parameters validation.
8094
8095   @type lu: L{LogicalUnit}
8096   @param lu: the logical unit for which we check
8097   @type required: boolean
8098   @param required: whether the validation should fail if the OS is not
8099       found
8100   @type nodenames: list
8101   @param nodenames: the list of nodes on which we should check
8102   @type osname: string
8103   @param osname: the name of the hypervisor we should use
8104   @type osparams: dict
8105   @param osparams: the parameters which we need to check
8106   @raise errors.OpPrereqError: if the parameters are not valid
8107
8108   """
8109   nodenames = _FilterVmNodes(lu, nodenames)
8110   result = lu.rpc.call_os_validate(required, nodenames, osname,
8111                                    [constants.OS_VALIDATE_PARAMETERS],
8112                                    osparams)
8113   for node, nres in result.items():
8114     # we don't check for offline cases since this should be run only
8115     # against the master node and/or an instance's nodes
8116     nres.Raise("OS Parameters validation failed on node %s" % node)
8117     if not nres.payload:
8118       lu.LogInfo("OS %s not found on node %s, validation skipped",
8119                  osname, node)
8120
8121
8122 class LUInstanceCreate(LogicalUnit):
8123   """Create an instance.
8124
8125   """
8126   HPATH = "instance-add"
8127   HTYPE = constants.HTYPE_INSTANCE
8128   REQ_BGL = False
8129
8130   def CheckArguments(self):
8131     """Check arguments.
8132
8133     """
8134     # do not require name_check to ease forward/backward compatibility
8135     # for tools
8136     if self.op.no_install and self.op.start:
8137       self.LogInfo("No-installation mode selected, disabling startup")
8138       self.op.start = False
8139     # validate/normalize the instance name
8140     self.op.instance_name = \
8141       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8142
8143     if self.op.ip_check and not self.op.name_check:
8144       # TODO: make the ip check more flexible and not depend on the name check
8145       raise errors.OpPrereqError("Cannot do IP address check without a name"
8146                                  " check", errors.ECODE_INVAL)
8147
8148     # check nics' parameter names
8149     for nic in self.op.nics:
8150       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8151
8152     # check disks. parameter names and consistent adopt/no-adopt strategy
8153     has_adopt = has_no_adopt = False
8154     for disk in self.op.disks:
8155       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8156       if constants.IDISK_ADOPT in disk:
8157         has_adopt = True
8158       else:
8159         has_no_adopt = True
8160     if has_adopt and has_no_adopt:
8161       raise errors.OpPrereqError("Either all disks are adopted or none is",
8162                                  errors.ECODE_INVAL)
8163     if has_adopt:
8164       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8165         raise errors.OpPrereqError("Disk adoption is not supported for the"
8166                                    " '%s' disk template" %
8167                                    self.op.disk_template,
8168                                    errors.ECODE_INVAL)
8169       if self.op.iallocator is not None:
8170         raise errors.OpPrereqError("Disk adoption not allowed with an"
8171                                    " iallocator script", errors.ECODE_INVAL)
8172       if self.op.mode == constants.INSTANCE_IMPORT:
8173         raise errors.OpPrereqError("Disk adoption not allowed for"
8174                                    " instance import", errors.ECODE_INVAL)
8175     else:
8176       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8177         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8178                                    " but no 'adopt' parameter given" %
8179                                    self.op.disk_template,
8180                                    errors.ECODE_INVAL)
8181
8182     self.adopt_disks = has_adopt
8183
8184     # instance name verification
8185     if self.op.name_check:
8186       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8187       self.op.instance_name = self.hostname1.name
8188       # used in CheckPrereq for ip ping check
8189       self.check_ip = self.hostname1.ip
8190     else:
8191       self.check_ip = None
8192
8193     # file storage checks
8194     if (self.op.file_driver and
8195         not self.op.file_driver in constants.FILE_DRIVER):
8196       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8197                                  self.op.file_driver, errors.ECODE_INVAL)
8198
8199     if self.op.disk_template == constants.DT_FILE:
8200       opcodes.RequireFileStorage()
8201     elif self.op.disk_template == constants.DT_SHARED_FILE:
8202       opcodes.RequireSharedFileStorage()
8203
8204     ### Node/iallocator related checks
8205     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8206
8207     if self.op.pnode is not None:
8208       if self.op.disk_template in constants.DTS_INT_MIRROR:
8209         if self.op.snode is None:
8210           raise errors.OpPrereqError("The networked disk templates need"
8211                                      " a mirror node", errors.ECODE_INVAL)
8212       elif self.op.snode:
8213         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8214                         " template")
8215         self.op.snode = None
8216
8217     self._cds = _GetClusterDomainSecret()
8218
8219     if self.op.mode == constants.INSTANCE_IMPORT:
8220       # On import force_variant must be True, because if we forced it at
8221       # initial install, our only chance when importing it back is that it
8222       # works again!
8223       self.op.force_variant = True
8224
8225       if self.op.no_install:
8226         self.LogInfo("No-installation mode has no effect during import")
8227
8228     elif self.op.mode == constants.INSTANCE_CREATE:
8229       if self.op.os_type is None:
8230         raise errors.OpPrereqError("No guest OS specified",
8231                                    errors.ECODE_INVAL)
8232       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8233         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8234                                    " installation" % self.op.os_type,
8235                                    errors.ECODE_STATE)
8236       if self.op.disk_template is None:
8237         raise errors.OpPrereqError("No disk template specified",
8238                                    errors.ECODE_INVAL)
8239
8240     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8241       # Check handshake to ensure both clusters have the same domain secret
8242       src_handshake = self.op.source_handshake
8243       if not src_handshake:
8244         raise errors.OpPrereqError("Missing source handshake",
8245                                    errors.ECODE_INVAL)
8246
8247       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8248                                                            src_handshake)
8249       if errmsg:
8250         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8251                                    errors.ECODE_INVAL)
8252
8253       # Load and check source CA
8254       self.source_x509_ca_pem = self.op.source_x509_ca
8255       if not self.source_x509_ca_pem:
8256         raise errors.OpPrereqError("Missing source X509 CA",
8257                                    errors.ECODE_INVAL)
8258
8259       try:
8260         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8261                                                     self._cds)
8262       except OpenSSL.crypto.Error, err:
8263         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8264                                    (err, ), errors.ECODE_INVAL)
8265
8266       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8267       if errcode is not None:
8268         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8269                                    errors.ECODE_INVAL)
8270
8271       self.source_x509_ca = cert
8272
8273       src_instance_name = self.op.source_instance_name
8274       if not src_instance_name:
8275         raise errors.OpPrereqError("Missing source instance name",
8276                                    errors.ECODE_INVAL)
8277
8278       self.source_instance_name = \
8279           netutils.GetHostname(name=src_instance_name).name
8280
8281     else:
8282       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8283                                  self.op.mode, errors.ECODE_INVAL)
8284
8285   def ExpandNames(self):
8286     """ExpandNames for CreateInstance.
8287
8288     Figure out the right locks for instance creation.
8289
8290     """
8291     self.needed_locks = {}
8292
8293     instance_name = self.op.instance_name
8294     # this is just a preventive check, but someone might still add this
8295     # instance in the meantime, and creation will fail at lock-add time
8296     if instance_name in self.cfg.GetInstanceList():
8297       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8298                                  instance_name, errors.ECODE_EXISTS)
8299
8300     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8301
8302     if self.op.iallocator:
8303       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8304     else:
8305       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8306       nodelist = [self.op.pnode]
8307       if self.op.snode is not None:
8308         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8309         nodelist.append(self.op.snode)
8310       self.needed_locks[locking.LEVEL_NODE] = nodelist
8311
8312     # in case of import lock the source node too
8313     if self.op.mode == constants.INSTANCE_IMPORT:
8314       src_node = self.op.src_node
8315       src_path = self.op.src_path
8316
8317       if src_path is None:
8318         self.op.src_path = src_path = self.op.instance_name
8319
8320       if src_node is None:
8321         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8322         self.op.src_node = None
8323         if os.path.isabs(src_path):
8324           raise errors.OpPrereqError("Importing an instance from a path"
8325                                      " requires a source node option",
8326                                      errors.ECODE_INVAL)
8327       else:
8328         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8329         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8330           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8331         if not os.path.isabs(src_path):
8332           self.op.src_path = src_path = \
8333             utils.PathJoin(constants.EXPORT_DIR, src_path)
8334
8335   def _RunAllocator(self):
8336     """Run the allocator based on input opcode.
8337
8338     """
8339     nics = [n.ToDict() for n in self.nics]
8340     ial = IAllocator(self.cfg, self.rpc,
8341                      mode=constants.IALLOCATOR_MODE_ALLOC,
8342                      name=self.op.instance_name,
8343                      disk_template=self.op.disk_template,
8344                      tags=self.op.tags,
8345                      os=self.op.os_type,
8346                      vcpus=self.be_full[constants.BE_VCPUS],
8347                      memory=self.be_full[constants.BE_MEMORY],
8348                      disks=self.disks,
8349                      nics=nics,
8350                      hypervisor=self.op.hypervisor,
8351                      )
8352
8353     ial.Run(self.op.iallocator)
8354
8355     if not ial.success:
8356       raise errors.OpPrereqError("Can't compute nodes using"
8357                                  " iallocator '%s': %s" %
8358                                  (self.op.iallocator, ial.info),
8359                                  errors.ECODE_NORES)
8360     if len(ial.result) != ial.required_nodes:
8361       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8362                                  " of nodes (%s), required %s" %
8363                                  (self.op.iallocator, len(ial.result),
8364                                   ial.required_nodes), errors.ECODE_FAULT)
8365     self.op.pnode = ial.result[0]
8366     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8367                  self.op.instance_name, self.op.iallocator,
8368                  utils.CommaJoin(ial.result))
8369     if ial.required_nodes == 2:
8370       self.op.snode = ial.result[1]
8371
8372   def BuildHooksEnv(self):
8373     """Build hooks env.
8374
8375     This runs on master, primary and secondary nodes of the instance.
8376
8377     """
8378     env = {
8379       "ADD_MODE": self.op.mode,
8380       }
8381     if self.op.mode == constants.INSTANCE_IMPORT:
8382       env["SRC_NODE"] = self.op.src_node
8383       env["SRC_PATH"] = self.op.src_path
8384       env["SRC_IMAGES"] = self.src_images
8385
8386     env.update(_BuildInstanceHookEnv(
8387       name=self.op.instance_name,
8388       primary_node=self.op.pnode,
8389       secondary_nodes=self.secondaries,
8390       status=self.op.start,
8391       os_type=self.op.os_type,
8392       memory=self.be_full[constants.BE_MEMORY],
8393       vcpus=self.be_full[constants.BE_VCPUS],
8394       nics=_NICListToTuple(self, self.nics),
8395       disk_template=self.op.disk_template,
8396       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8397              for d in self.disks],
8398       bep=self.be_full,
8399       hvp=self.hv_full,
8400       hypervisor_name=self.op.hypervisor,
8401       tags=self.op.tags,
8402     ))
8403
8404     return env
8405
8406   def BuildHooksNodes(self):
8407     """Build hooks nodes.
8408
8409     """
8410     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8411     return nl, nl
8412
8413   def _ReadExportInfo(self):
8414     """Reads the export information from disk.
8415
8416     It will override the opcode source node and path with the actual
8417     information, if these two were not specified before.
8418
8419     @return: the export information
8420
8421     """
8422     assert self.op.mode == constants.INSTANCE_IMPORT
8423
8424     src_node = self.op.src_node
8425     src_path = self.op.src_path
8426
8427     if src_node is None:
8428       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8429       exp_list = self.rpc.call_export_list(locked_nodes)
8430       found = False
8431       for node in exp_list:
8432         if exp_list[node].fail_msg:
8433           continue
8434         if src_path in exp_list[node].payload:
8435           found = True
8436           self.op.src_node = src_node = node
8437           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8438                                                        src_path)
8439           break
8440       if not found:
8441         raise errors.OpPrereqError("No export found for relative path %s" %
8442                                     src_path, errors.ECODE_INVAL)
8443
8444     _CheckNodeOnline(self, src_node)
8445     result = self.rpc.call_export_info(src_node, src_path)
8446     result.Raise("No export or invalid export found in dir %s" % src_path)
8447
8448     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8449     if not export_info.has_section(constants.INISECT_EXP):
8450       raise errors.ProgrammerError("Corrupted export config",
8451                                    errors.ECODE_ENVIRON)
8452
8453     ei_version = export_info.get(constants.INISECT_EXP, "version")
8454     if (int(ei_version) != constants.EXPORT_VERSION):
8455       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8456                                  (ei_version, constants.EXPORT_VERSION),
8457                                  errors.ECODE_ENVIRON)
8458     return export_info
8459
8460   def _ReadExportParams(self, einfo):
8461     """Use export parameters as defaults.
8462
8463     In case the opcode doesn't specify (as in override) some instance
8464     parameters, then try to use them from the export information, if
8465     that declares them.
8466
8467     """
8468     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8469
8470     if self.op.disk_template is None:
8471       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8472         self.op.disk_template = einfo.get(constants.INISECT_INS,
8473                                           "disk_template")
8474       else:
8475         raise errors.OpPrereqError("No disk template specified and the export"
8476                                    " is missing the disk_template information",
8477                                    errors.ECODE_INVAL)
8478
8479     if not self.op.disks:
8480       if einfo.has_option(constants.INISECT_INS, "disk_count"):
8481         disks = []
8482         # TODO: import the disk iv_name too
8483         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8484           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8485           disks.append({constants.IDISK_SIZE: disk_sz})
8486         self.op.disks = disks
8487       else:
8488         raise errors.OpPrereqError("No disk info specified and the export"
8489                                    " is missing the disk information",
8490                                    errors.ECODE_INVAL)
8491
8492     if (not self.op.nics and
8493         einfo.has_option(constants.INISECT_INS, "nic_count")):
8494       nics = []
8495       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8496         ndict = {}
8497         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8498           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8499           ndict[name] = v
8500         nics.append(ndict)
8501       self.op.nics = nics
8502
8503     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8504       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8505
8506     if (self.op.hypervisor is None and
8507         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8508       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8509
8510     if einfo.has_section(constants.INISECT_HYP):
8511       # use the export parameters but do not override the ones
8512       # specified by the user
8513       for name, value in einfo.items(constants.INISECT_HYP):
8514         if name not in self.op.hvparams:
8515           self.op.hvparams[name] = value
8516
8517     if einfo.has_section(constants.INISECT_BEP):
8518       # use the parameters, without overriding
8519       for name, value in einfo.items(constants.INISECT_BEP):
8520         if name not in self.op.beparams:
8521           self.op.beparams[name] = value
8522     else:
8523       # try to read the parameters old style, from the main section
8524       for name in constants.BES_PARAMETERS:
8525         if (name not in self.op.beparams and
8526             einfo.has_option(constants.INISECT_INS, name)):
8527           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8528
8529     if einfo.has_section(constants.INISECT_OSP):
8530       # use the parameters, without overriding
8531       for name, value in einfo.items(constants.INISECT_OSP):
8532         if name not in self.op.osparams:
8533           self.op.osparams[name] = value
8534
8535   def _RevertToDefaults(self, cluster):
8536     """Revert the instance parameters to the default values.
8537
8538     """
8539     # hvparams
8540     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8541     for name in self.op.hvparams.keys():
8542       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8543         del self.op.hvparams[name]
8544     # beparams
8545     be_defs = cluster.SimpleFillBE({})
8546     for name in self.op.beparams.keys():
8547       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8548         del self.op.beparams[name]
8549     # nic params
8550     nic_defs = cluster.SimpleFillNIC({})
8551     for nic in self.op.nics:
8552       for name in constants.NICS_PARAMETERS:
8553         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8554           del nic[name]
8555     # osparams
8556     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8557     for name in self.op.osparams.keys():
8558       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8559         del self.op.osparams[name]
8560
8561   def _CalculateFileStorageDir(self):
8562     """Calculate final instance file storage dir.
8563
8564     """
8565     # file storage dir calculation/check
8566     self.instance_file_storage_dir = None
8567     if self.op.disk_template in constants.DTS_FILEBASED:
8568       # build the full file storage dir path
8569       joinargs = []
8570
8571       if self.op.disk_template == constants.DT_SHARED_FILE:
8572         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8573       else:
8574         get_fsd_fn = self.cfg.GetFileStorageDir
8575
8576       cfg_storagedir = get_fsd_fn()
8577       if not cfg_storagedir:
8578         raise errors.OpPrereqError("Cluster file storage dir not defined")
8579       joinargs.append(cfg_storagedir)
8580
8581       if self.op.file_storage_dir is not None:
8582         joinargs.append(self.op.file_storage_dir)
8583
8584       joinargs.append(self.op.instance_name)
8585
8586       # pylint: disable=W0142
8587       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8588
8589   def CheckPrereq(self):
8590     """Check prerequisites.
8591
8592     """
8593     self._CalculateFileStorageDir()
8594
8595     if self.op.mode == constants.INSTANCE_IMPORT:
8596       export_info = self._ReadExportInfo()
8597       self._ReadExportParams(export_info)
8598
8599     if (not self.cfg.GetVGName() and
8600         self.op.disk_template not in constants.DTS_NOT_LVM):
8601       raise errors.OpPrereqError("Cluster does not support lvm-based"
8602                                  " instances", errors.ECODE_STATE)
8603
8604     if self.op.hypervisor is None:
8605       self.op.hypervisor = self.cfg.GetHypervisorType()
8606
8607     cluster = self.cfg.GetClusterInfo()
8608     enabled_hvs = cluster.enabled_hypervisors
8609     if self.op.hypervisor not in enabled_hvs:
8610       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8611                                  " cluster (%s)" % (self.op.hypervisor,
8612                                   ",".join(enabled_hvs)),
8613                                  errors.ECODE_STATE)
8614
8615     # Check tag validity
8616     for tag in self.op.tags:
8617       objects.TaggableObject.ValidateTag(tag)
8618
8619     # check hypervisor parameter syntax (locally)
8620     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8621     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8622                                       self.op.hvparams)
8623     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8624     hv_type.CheckParameterSyntax(filled_hvp)
8625     self.hv_full = filled_hvp
8626     # check that we don't specify global parameters on an instance
8627     _CheckGlobalHvParams(self.op.hvparams)
8628
8629     # fill and remember the beparams dict
8630     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8631     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8632
8633     # build os parameters
8634     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8635
8636     # now that hvp/bep are in final format, let's reset to defaults,
8637     # if told to do so
8638     if self.op.identify_defaults:
8639       self._RevertToDefaults(cluster)
8640
8641     # NIC buildup
8642     self.nics = []
8643     for idx, nic in enumerate(self.op.nics):
8644       nic_mode_req = nic.get(constants.INIC_MODE, None)
8645       nic_mode = nic_mode_req
8646       if nic_mode is None:
8647         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8648
8649       # in routed mode, for the first nic, the default ip is 'auto'
8650       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8651         default_ip_mode = constants.VALUE_AUTO
8652       else:
8653         default_ip_mode = constants.VALUE_NONE
8654
8655       # ip validity checks
8656       ip = nic.get(constants.INIC_IP, default_ip_mode)
8657       if ip is None or ip.lower() == constants.VALUE_NONE:
8658         nic_ip = None
8659       elif ip.lower() == constants.VALUE_AUTO:
8660         if not self.op.name_check:
8661           raise errors.OpPrereqError("IP address set to auto but name checks"
8662                                      " have been skipped",
8663                                      errors.ECODE_INVAL)
8664         nic_ip = self.hostname1.ip
8665       else:
8666         if not netutils.IPAddress.IsValid(ip):
8667           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8668                                      errors.ECODE_INVAL)
8669         nic_ip = ip
8670
8671       # TODO: check the ip address for uniqueness
8672       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8673         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8674                                    errors.ECODE_INVAL)
8675
8676       # MAC address verification
8677       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8678       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8679         mac = utils.NormalizeAndValidateMac(mac)
8680
8681         try:
8682           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8683         except errors.ReservationError:
8684           raise errors.OpPrereqError("MAC address %s already in use"
8685                                      " in cluster" % mac,
8686                                      errors.ECODE_NOTUNIQUE)
8687
8688       #  Build nic parameters
8689       link = nic.get(constants.INIC_LINK, None)
8690       nicparams = {}
8691       if nic_mode_req:
8692         nicparams[constants.NIC_MODE] = nic_mode_req
8693       if link:
8694         nicparams[constants.NIC_LINK] = link
8695
8696       check_params = cluster.SimpleFillNIC(nicparams)
8697       objects.NIC.CheckParameterSyntax(check_params)
8698       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8699
8700     # disk checks/pre-build
8701     default_vg = self.cfg.GetVGName()
8702     self.disks = []
8703     for disk in self.op.disks:
8704       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8705       if mode not in constants.DISK_ACCESS_SET:
8706         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8707                                    mode, errors.ECODE_INVAL)
8708       size = disk.get(constants.IDISK_SIZE, None)
8709       if size is None:
8710         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8711       try:
8712         size = int(size)
8713       except (TypeError, ValueError):
8714         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8715                                    errors.ECODE_INVAL)
8716
8717       data_vg = disk.get(constants.IDISK_VG, default_vg)
8718       new_disk = {
8719         constants.IDISK_SIZE: size,
8720         constants.IDISK_MODE: mode,
8721         constants.IDISK_VG: data_vg,
8722         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8723         }
8724       if constants.IDISK_ADOPT in disk:
8725         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8726       self.disks.append(new_disk)
8727
8728     if self.op.mode == constants.INSTANCE_IMPORT:
8729
8730       # Check that the new instance doesn't have less disks than the export
8731       instance_disks = len(self.disks)
8732       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8733       if instance_disks < export_disks:
8734         raise errors.OpPrereqError("Not enough disks to import."
8735                                    " (instance: %d, export: %d)" %
8736                                    (instance_disks, export_disks),
8737                                    errors.ECODE_INVAL)
8738
8739       disk_images = []
8740       for idx in range(export_disks):
8741         option = "disk%d_dump" % idx
8742         if export_info.has_option(constants.INISECT_INS, option):
8743           # FIXME: are the old os-es, disk sizes, etc. useful?
8744           export_name = export_info.get(constants.INISECT_INS, option)
8745           image = utils.PathJoin(self.op.src_path, export_name)
8746           disk_images.append(image)
8747         else:
8748           disk_images.append(False)
8749
8750       self.src_images = disk_images
8751
8752       old_name = export_info.get(constants.INISECT_INS, "name")
8753       try:
8754         exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8755       except (TypeError, ValueError), err:
8756         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8757                                    " an integer: %s" % str(err),
8758                                    errors.ECODE_STATE)
8759       if self.op.instance_name == old_name:
8760         for idx, nic in enumerate(self.nics):
8761           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8762             nic_mac_ini = "nic%d_mac" % idx
8763             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8764
8765     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8766
8767     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8768     if self.op.ip_check:
8769       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8770         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8771                                    (self.check_ip, self.op.instance_name),
8772                                    errors.ECODE_NOTUNIQUE)
8773
8774     #### mac address generation
8775     # By generating here the mac address both the allocator and the hooks get
8776     # the real final mac address rather than the 'auto' or 'generate' value.
8777     # There is a race condition between the generation and the instance object
8778     # creation, which means that we know the mac is valid now, but we're not
8779     # sure it will be when we actually add the instance. If things go bad
8780     # adding the instance will abort because of a duplicate mac, and the
8781     # creation job will fail.
8782     for nic in self.nics:
8783       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8784         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8785
8786     #### allocator run
8787
8788     if self.op.iallocator is not None:
8789       self._RunAllocator()
8790
8791     #### node related checks
8792
8793     # check primary node
8794     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8795     assert self.pnode is not None, \
8796       "Cannot retrieve locked node %s" % self.op.pnode
8797     if pnode.offline:
8798       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8799                                  pnode.name, errors.ECODE_STATE)
8800     if pnode.drained:
8801       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8802                                  pnode.name, errors.ECODE_STATE)
8803     if not pnode.vm_capable:
8804       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8805                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8806
8807     self.secondaries = []
8808
8809     # mirror node verification
8810     if self.op.disk_template in constants.DTS_INT_MIRROR:
8811       if self.op.snode == pnode.name:
8812         raise errors.OpPrereqError("The secondary node cannot be the"
8813                                    " primary node", errors.ECODE_INVAL)
8814       _CheckNodeOnline(self, self.op.snode)
8815       _CheckNodeNotDrained(self, self.op.snode)
8816       _CheckNodeVmCapable(self, self.op.snode)
8817       self.secondaries.append(self.op.snode)
8818
8819     nodenames = [pnode.name] + self.secondaries
8820
8821     if not self.adopt_disks:
8822       # Check lv size requirements, if not adopting
8823       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8824       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8825
8826     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8827       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8828                                 disk[constants.IDISK_ADOPT])
8829                      for disk in self.disks])
8830       if len(all_lvs) != len(self.disks):
8831         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8832                                    errors.ECODE_INVAL)
8833       for lv_name in all_lvs:
8834         try:
8835           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8836           # to ReserveLV uses the same syntax
8837           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8838         except errors.ReservationError:
8839           raise errors.OpPrereqError("LV named %s used by another instance" %
8840                                      lv_name, errors.ECODE_NOTUNIQUE)
8841
8842       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8843       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8844
8845       node_lvs = self.rpc.call_lv_list([pnode.name],
8846                                        vg_names.payload.keys())[pnode.name]
8847       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8848       node_lvs = node_lvs.payload
8849
8850       delta = all_lvs.difference(node_lvs.keys())
8851       if delta:
8852         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8853                                    utils.CommaJoin(delta),
8854                                    errors.ECODE_INVAL)
8855       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8856       if online_lvs:
8857         raise errors.OpPrereqError("Online logical volumes found, cannot"
8858                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8859                                    errors.ECODE_STATE)
8860       # update the size of disk based on what is found
8861       for dsk in self.disks:
8862         dsk[constants.IDISK_SIZE] = \
8863           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8864                                         dsk[constants.IDISK_ADOPT])][0]))
8865
8866     elif self.op.disk_template == constants.DT_BLOCK:
8867       # Normalize and de-duplicate device paths
8868       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8869                        for disk in self.disks])
8870       if len(all_disks) != len(self.disks):
8871         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8872                                    errors.ECODE_INVAL)
8873       baddisks = [d for d in all_disks
8874                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8875       if baddisks:
8876         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8877                                    " cannot be adopted" %
8878                                    (", ".join(baddisks),
8879                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8880                                    errors.ECODE_INVAL)
8881
8882       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8883                                             list(all_disks))[pnode.name]
8884       node_disks.Raise("Cannot get block device information from node %s" %
8885                        pnode.name)
8886       node_disks = node_disks.payload
8887       delta = all_disks.difference(node_disks.keys())
8888       if delta:
8889         raise errors.OpPrereqError("Missing block device(s): %s" %
8890                                    utils.CommaJoin(delta),
8891                                    errors.ECODE_INVAL)
8892       for dsk in self.disks:
8893         dsk[constants.IDISK_SIZE] = \
8894           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8895
8896     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8897
8898     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8899     # check OS parameters (remotely)
8900     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8901
8902     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8903
8904     # memory check on primary node
8905     if self.op.start:
8906       _CheckNodeFreeMemory(self, self.pnode.name,
8907                            "creating instance %s" % self.op.instance_name,
8908                            self.be_full[constants.BE_MEMORY],
8909                            self.op.hypervisor)
8910
8911     self.dry_run_result = list(nodenames)
8912
8913   def Exec(self, feedback_fn):
8914     """Create and add the instance to the cluster.
8915
8916     """
8917     instance = self.op.instance_name
8918     pnode_name = self.pnode.name
8919
8920     ht_kind = self.op.hypervisor
8921     if ht_kind in constants.HTS_REQ_PORT:
8922       network_port = self.cfg.AllocatePort()
8923     else:
8924       network_port = None
8925
8926     disks = _GenerateDiskTemplate(self,
8927                                   self.op.disk_template,
8928                                   instance, pnode_name,
8929                                   self.secondaries,
8930                                   self.disks,
8931                                   self.instance_file_storage_dir,
8932                                   self.op.file_driver,
8933                                   0,
8934                                   feedback_fn)
8935
8936     iobj = objects.Instance(name=instance, os=self.op.os_type,
8937                             primary_node=pnode_name,
8938                             nics=self.nics, disks=disks,
8939                             disk_template=self.op.disk_template,
8940                             admin_up=False,
8941                             network_port=network_port,
8942                             beparams=self.op.beparams,
8943                             hvparams=self.op.hvparams,
8944                             hypervisor=self.op.hypervisor,
8945                             osparams=self.op.osparams,
8946                             )
8947
8948     if self.op.tags:
8949       for tag in self.op.tags:
8950         iobj.AddTag(tag)
8951
8952     if self.adopt_disks:
8953       if self.op.disk_template == constants.DT_PLAIN:
8954         # rename LVs to the newly-generated names; we need to construct
8955         # 'fake' LV disks with the old data, plus the new unique_id
8956         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8957         rename_to = []
8958         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8959           rename_to.append(t_dsk.logical_id)
8960           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8961           self.cfg.SetDiskID(t_dsk, pnode_name)
8962         result = self.rpc.call_blockdev_rename(pnode_name,
8963                                                zip(tmp_disks, rename_to))
8964         result.Raise("Failed to rename adoped LVs")
8965     else:
8966       feedback_fn("* creating instance disks...")
8967       try:
8968         _CreateDisks(self, iobj)
8969       except errors.OpExecError:
8970         self.LogWarning("Device creation failed, reverting...")
8971         try:
8972           _RemoveDisks(self, iobj)
8973         finally:
8974           self.cfg.ReleaseDRBDMinors(instance)
8975           raise
8976
8977     feedback_fn("adding instance %s to cluster config" % instance)
8978
8979     self.cfg.AddInstance(iobj, self.proc.GetECId())
8980
8981     # Declare that we don't want to remove the instance lock anymore, as we've
8982     # added the instance to the config
8983     del self.remove_locks[locking.LEVEL_INSTANCE]
8984
8985     if self.op.mode == constants.INSTANCE_IMPORT:
8986       # Release unused nodes
8987       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8988     else:
8989       # Release all nodes
8990       _ReleaseLocks(self, locking.LEVEL_NODE)
8991
8992     disk_abort = False
8993     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8994       feedback_fn("* wiping instance disks...")
8995       try:
8996         _WipeDisks(self, iobj)
8997       except errors.OpExecError, err:
8998         logging.exception("Wiping disks failed")
8999         self.LogWarning("Wiping instance disks failed (%s)", err)
9000         disk_abort = True
9001
9002     if disk_abort:
9003       # Something is already wrong with the disks, don't do anything else
9004       pass
9005     elif self.op.wait_for_sync:
9006       disk_abort = not _WaitForSync(self, iobj)
9007     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9008       # make sure the disks are not degraded (still sync-ing is ok)
9009       feedback_fn("* checking mirrors status")
9010       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9011     else:
9012       disk_abort = False
9013
9014     if disk_abort:
9015       _RemoveDisks(self, iobj)
9016       self.cfg.RemoveInstance(iobj.name)
9017       # Make sure the instance lock gets removed
9018       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9019       raise errors.OpExecError("There are some degraded disks for"
9020                                " this instance")
9021
9022     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9023       if self.op.mode == constants.INSTANCE_CREATE:
9024         if not self.op.no_install:
9025           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9026                         not self.op.wait_for_sync)
9027           if pause_sync:
9028             feedback_fn("* pausing disk sync to install instance OS")
9029             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9030                                                               iobj.disks, True)
9031             for idx, success in enumerate(result.payload):
9032               if not success:
9033                 logging.warn("pause-sync of instance %s for disk %d failed",
9034                              instance, idx)
9035
9036           feedback_fn("* running the instance OS create scripts...")
9037           # FIXME: pass debug option from opcode to backend
9038           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
9039                                                  self.op.debug_level)
9040           if pause_sync:
9041             feedback_fn("* resuming disk sync")
9042             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9043                                                               iobj.disks, False)
9044             for idx, success in enumerate(result.payload):
9045               if not success:
9046                 logging.warn("resume-sync of instance %s for disk %d failed",
9047                              instance, idx)
9048
9049           result.Raise("Could not add os for instance %s"
9050                        " on node %s" % (instance, pnode_name))
9051
9052       elif self.op.mode == constants.INSTANCE_IMPORT:
9053         feedback_fn("* running the instance OS import scripts...")
9054
9055         transfers = []
9056
9057         for idx, image in enumerate(self.src_images):
9058           if not image:
9059             continue
9060
9061           # FIXME: pass debug option from opcode to backend
9062           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9063                                              constants.IEIO_FILE, (image, ),
9064                                              constants.IEIO_SCRIPT,
9065                                              (iobj.disks[idx], idx),
9066                                              None)
9067           transfers.append(dt)
9068
9069         import_result = \
9070           masterd.instance.TransferInstanceData(self, feedback_fn,
9071                                                 self.op.src_node, pnode_name,
9072                                                 self.pnode.secondary_ip,
9073                                                 iobj, transfers)
9074         if not compat.all(import_result):
9075           self.LogWarning("Some disks for instance %s on node %s were not"
9076                           " imported successfully" % (instance, pnode_name))
9077
9078       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9079         feedback_fn("* preparing remote import...")
9080         # The source cluster will stop the instance before attempting to make a
9081         # connection. In some cases stopping an instance can take a long time,
9082         # hence the shutdown timeout is added to the connection timeout.
9083         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9084                            self.op.source_shutdown_timeout)
9085         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9086
9087         assert iobj.primary_node == self.pnode.name
9088         disk_results = \
9089           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9090                                         self.source_x509_ca,
9091                                         self._cds, timeouts)
9092         if not compat.all(disk_results):
9093           # TODO: Should the instance still be started, even if some disks
9094           # failed to import (valid for local imports, too)?
9095           self.LogWarning("Some disks for instance %s on node %s were not"
9096                           " imported successfully" % (instance, pnode_name))
9097
9098         # Run rename script on newly imported instance
9099         assert iobj.name == instance
9100         feedback_fn("Running rename script for %s" % instance)
9101         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9102                                                    self.source_instance_name,
9103                                                    self.op.debug_level)
9104         if result.fail_msg:
9105           self.LogWarning("Failed to run rename script for %s on node"
9106                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9107
9108       else:
9109         # also checked in the prereq part
9110         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9111                                      % self.op.mode)
9112
9113     if self.op.start:
9114       iobj.admin_up = True
9115       self.cfg.Update(iobj, feedback_fn)
9116       logging.info("Starting instance %s on node %s", instance, pnode_name)
9117       feedback_fn("* starting instance...")
9118       result = self.rpc.call_instance_start(pnode_name, iobj,
9119                                             None, None, False)
9120       result.Raise("Could not start instance")
9121
9122     return list(iobj.all_nodes)
9123
9124
9125 class LUInstanceConsole(NoHooksLU):
9126   """Connect to an instance's console.
9127
9128   This is somewhat special in that it returns the command line that
9129   you need to run on the master node in order to connect to the
9130   console.
9131
9132   """
9133   REQ_BGL = False
9134
9135   def ExpandNames(self):
9136     self._ExpandAndLockInstance()
9137
9138   def CheckPrereq(self):
9139     """Check prerequisites.
9140
9141     This checks that the instance is in the cluster.
9142
9143     """
9144     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9145     assert self.instance is not None, \
9146       "Cannot retrieve locked instance %s" % self.op.instance_name
9147     _CheckNodeOnline(self, self.instance.primary_node)
9148
9149   def Exec(self, feedback_fn):
9150     """Connect to the console of an instance
9151
9152     """
9153     instance = self.instance
9154     node = instance.primary_node
9155
9156     node_insts = self.rpc.call_instance_list([node],
9157                                              [instance.hypervisor])[node]
9158     node_insts.Raise("Can't get node information from %s" % node)
9159
9160     if instance.name not in node_insts.payload:
9161       if instance.admin_up:
9162         state = constants.INSTST_ERRORDOWN
9163       else:
9164         state = constants.INSTST_ADMINDOWN
9165       raise errors.OpExecError("Instance %s is not running (state %s)" %
9166                                (instance.name, state))
9167
9168     logging.debug("Connecting to console of %s on %s", instance.name, node)
9169
9170     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9171
9172
9173 def _GetInstanceConsole(cluster, instance):
9174   """Returns console information for an instance.
9175
9176   @type cluster: L{objects.Cluster}
9177   @type instance: L{objects.Instance}
9178   @rtype: dict
9179
9180   """
9181   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9182   # beparams and hvparams are passed separately, to avoid editing the
9183   # instance and then saving the defaults in the instance itself.
9184   hvparams = cluster.FillHV(instance)
9185   beparams = cluster.FillBE(instance)
9186   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9187
9188   assert console.instance == instance.name
9189   assert console.Validate()
9190
9191   return console.ToDict()
9192
9193
9194 class LUInstanceReplaceDisks(LogicalUnit):
9195   """Replace the disks of an instance.
9196
9197   """
9198   HPATH = "mirrors-replace"
9199   HTYPE = constants.HTYPE_INSTANCE
9200   REQ_BGL = False
9201
9202   def CheckArguments(self):
9203     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9204                                   self.op.iallocator)
9205
9206   def ExpandNames(self):
9207     self._ExpandAndLockInstance()
9208
9209     assert locking.LEVEL_NODE not in self.needed_locks
9210     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9211
9212     assert self.op.iallocator is None or self.op.remote_node is None, \
9213       "Conflicting options"
9214
9215     if self.op.remote_node is not None:
9216       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9217
9218       # Warning: do not remove the locking of the new secondary here
9219       # unless DRBD8.AddChildren is changed to work in parallel;
9220       # currently it doesn't since parallel invocations of
9221       # FindUnusedMinor will conflict
9222       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9223       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9224     else:
9225       self.needed_locks[locking.LEVEL_NODE] = []
9226       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9227
9228       if self.op.iallocator is not None:
9229         # iallocator will select a new node in the same group
9230         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9231
9232     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9233                                    self.op.iallocator, self.op.remote_node,
9234                                    self.op.disks, False, self.op.early_release)
9235
9236     self.tasklets = [self.replacer]
9237
9238   def DeclareLocks(self, level):
9239     if level == locking.LEVEL_NODEGROUP:
9240       assert self.op.remote_node is None
9241       assert self.op.iallocator is not None
9242       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9243
9244       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9245       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9246         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9247
9248     elif level == locking.LEVEL_NODE:
9249       if self.op.iallocator is not None:
9250         assert self.op.remote_node is None
9251         assert not self.needed_locks[locking.LEVEL_NODE]
9252
9253         # Lock member nodes of all locked groups
9254         self.needed_locks[locking.LEVEL_NODE] = [node_name
9255           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9256           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9257       else:
9258         self._LockInstancesNodes()
9259
9260   def BuildHooksEnv(self):
9261     """Build hooks env.
9262
9263     This runs on the master, the primary and all the secondaries.
9264
9265     """
9266     instance = self.replacer.instance
9267     env = {
9268       "MODE": self.op.mode,
9269       "NEW_SECONDARY": self.op.remote_node,
9270       "OLD_SECONDARY": instance.secondary_nodes[0],
9271       }
9272     env.update(_BuildInstanceHookEnvByObject(self, instance))
9273     return env
9274
9275   def BuildHooksNodes(self):
9276     """Build hooks nodes.
9277
9278     """
9279     instance = self.replacer.instance
9280     nl = [
9281       self.cfg.GetMasterNode(),
9282       instance.primary_node,
9283       ]
9284     if self.op.remote_node is not None:
9285       nl.append(self.op.remote_node)
9286     return nl, nl
9287
9288   def CheckPrereq(self):
9289     """Check prerequisites.
9290
9291     """
9292     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9293             self.op.iallocator is None)
9294
9295     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9296     if owned_groups:
9297       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9298
9299     return LogicalUnit.CheckPrereq(self)
9300
9301
9302 class TLReplaceDisks(Tasklet):
9303   """Replaces disks for an instance.
9304
9305   Note: Locking is not within the scope of this class.
9306
9307   """
9308   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9309                disks, delay_iallocator, early_release):
9310     """Initializes this class.
9311
9312     """
9313     Tasklet.__init__(self, lu)
9314
9315     # Parameters
9316     self.instance_name = instance_name
9317     self.mode = mode
9318     self.iallocator_name = iallocator_name
9319     self.remote_node = remote_node
9320     self.disks = disks
9321     self.delay_iallocator = delay_iallocator
9322     self.early_release = early_release
9323
9324     # Runtime data
9325     self.instance = None
9326     self.new_node = None
9327     self.target_node = None
9328     self.other_node = None
9329     self.remote_node_info = None
9330     self.node_secondary_ip = None
9331
9332   @staticmethod
9333   def CheckArguments(mode, remote_node, iallocator):
9334     """Helper function for users of this class.
9335
9336     """
9337     # check for valid parameter combination
9338     if mode == constants.REPLACE_DISK_CHG:
9339       if remote_node is None and iallocator is None:
9340         raise errors.OpPrereqError("When changing the secondary either an"
9341                                    " iallocator script must be used or the"
9342                                    " new node given", errors.ECODE_INVAL)
9343
9344       if remote_node is not None and iallocator is not None:
9345         raise errors.OpPrereqError("Give either the iallocator or the new"
9346                                    " secondary, not both", errors.ECODE_INVAL)
9347
9348     elif remote_node is not None or iallocator is not None:
9349       # Not replacing the secondary
9350       raise errors.OpPrereqError("The iallocator and new node options can"
9351                                  " only be used when changing the"
9352                                  " secondary node", errors.ECODE_INVAL)
9353
9354   @staticmethod
9355   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9356     """Compute a new secondary node using an IAllocator.
9357
9358     """
9359     ial = IAllocator(lu.cfg, lu.rpc,
9360                      mode=constants.IALLOCATOR_MODE_RELOC,
9361                      name=instance_name,
9362                      relocate_from=list(relocate_from))
9363
9364     ial.Run(iallocator_name)
9365
9366     if not ial.success:
9367       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9368                                  " %s" % (iallocator_name, ial.info),
9369                                  errors.ECODE_NORES)
9370
9371     if len(ial.result) != ial.required_nodes:
9372       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9373                                  " of nodes (%s), required %s" %
9374                                  (iallocator_name,
9375                                   len(ial.result), ial.required_nodes),
9376                                  errors.ECODE_FAULT)
9377
9378     remote_node_name = ial.result[0]
9379
9380     lu.LogInfo("Selected new secondary for instance '%s': %s",
9381                instance_name, remote_node_name)
9382
9383     return remote_node_name
9384
9385   def _FindFaultyDisks(self, node_name):
9386     """Wrapper for L{_FindFaultyInstanceDisks}.
9387
9388     """
9389     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9390                                     node_name, True)
9391
9392   def _CheckDisksActivated(self, instance):
9393     """Checks if the instance disks are activated.
9394
9395     @param instance: The instance to check disks
9396     @return: True if they are activated, False otherwise
9397
9398     """
9399     nodes = instance.all_nodes
9400
9401     for idx, dev in enumerate(instance.disks):
9402       for node in nodes:
9403         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9404         self.cfg.SetDiskID(dev, node)
9405
9406         result = self.rpc.call_blockdev_find(node, dev)
9407
9408         if result.offline:
9409           continue
9410         elif result.fail_msg or not result.payload:
9411           return False
9412
9413     return True
9414
9415   def CheckPrereq(self):
9416     """Check prerequisites.
9417
9418     This checks that the instance is in the cluster.
9419
9420     """
9421     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9422     assert instance is not None, \
9423       "Cannot retrieve locked instance %s" % self.instance_name
9424
9425     if instance.disk_template != constants.DT_DRBD8:
9426       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9427                                  " instances", errors.ECODE_INVAL)
9428
9429     if len(instance.secondary_nodes) != 1:
9430       raise errors.OpPrereqError("The instance has a strange layout,"
9431                                  " expected one secondary but found %d" %
9432                                  len(instance.secondary_nodes),
9433                                  errors.ECODE_FAULT)
9434
9435     if not self.delay_iallocator:
9436       self._CheckPrereq2()
9437
9438   def _CheckPrereq2(self):
9439     """Check prerequisites, second part.
9440
9441     This function should always be part of CheckPrereq. It was separated and is
9442     now called from Exec because during node evacuation iallocator was only
9443     called with an unmodified cluster model, not taking planned changes into
9444     account.
9445
9446     """
9447     instance = self.instance
9448     secondary_node = instance.secondary_nodes[0]
9449
9450     if self.iallocator_name is None:
9451       remote_node = self.remote_node
9452     else:
9453       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9454                                        instance.name, instance.secondary_nodes)
9455
9456     if remote_node is None:
9457       self.remote_node_info = None
9458     else:
9459       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9460              "Remote node '%s' is not locked" % remote_node
9461
9462       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9463       assert self.remote_node_info is not None, \
9464         "Cannot retrieve locked node %s" % remote_node
9465
9466     if remote_node == self.instance.primary_node:
9467       raise errors.OpPrereqError("The specified node is the primary node of"
9468                                  " the instance", errors.ECODE_INVAL)
9469
9470     if remote_node == secondary_node:
9471       raise errors.OpPrereqError("The specified node is already the"
9472                                  " secondary node of the instance",
9473                                  errors.ECODE_INVAL)
9474
9475     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9476                                     constants.REPLACE_DISK_CHG):
9477       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9478                                  errors.ECODE_INVAL)
9479
9480     if self.mode == constants.REPLACE_DISK_AUTO:
9481       if not self._CheckDisksActivated(instance):
9482         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9483                                    " first" % self.instance_name,
9484                                    errors.ECODE_STATE)
9485       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9486       faulty_secondary = self._FindFaultyDisks(secondary_node)
9487
9488       if faulty_primary and faulty_secondary:
9489         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9490                                    " one node and can not be repaired"
9491                                    " automatically" % self.instance_name,
9492                                    errors.ECODE_STATE)
9493
9494       if faulty_primary:
9495         self.disks = faulty_primary
9496         self.target_node = instance.primary_node
9497         self.other_node = secondary_node
9498         check_nodes = [self.target_node, self.other_node]
9499       elif faulty_secondary:
9500         self.disks = faulty_secondary
9501         self.target_node = secondary_node
9502         self.other_node = instance.primary_node
9503         check_nodes = [self.target_node, self.other_node]
9504       else:
9505         self.disks = []
9506         check_nodes = []
9507
9508     else:
9509       # Non-automatic modes
9510       if self.mode == constants.REPLACE_DISK_PRI:
9511         self.target_node = instance.primary_node
9512         self.other_node = secondary_node
9513         check_nodes = [self.target_node, self.other_node]
9514
9515       elif self.mode == constants.REPLACE_DISK_SEC:
9516         self.target_node = secondary_node
9517         self.other_node = instance.primary_node
9518         check_nodes = [self.target_node, self.other_node]
9519
9520       elif self.mode == constants.REPLACE_DISK_CHG:
9521         self.new_node = remote_node
9522         self.other_node = instance.primary_node
9523         self.target_node = secondary_node
9524         check_nodes = [self.new_node, self.other_node]
9525
9526         _CheckNodeNotDrained(self.lu, remote_node)
9527         _CheckNodeVmCapable(self.lu, remote_node)
9528
9529         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9530         assert old_node_info is not None
9531         if old_node_info.offline and not self.early_release:
9532           # doesn't make sense to delay the release
9533           self.early_release = True
9534           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9535                           " early-release mode", secondary_node)
9536
9537       else:
9538         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9539                                      self.mode)
9540
9541       # If not specified all disks should be replaced
9542       if not self.disks:
9543         self.disks = range(len(self.instance.disks))
9544
9545     for node in check_nodes:
9546       _CheckNodeOnline(self.lu, node)
9547
9548     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9549                                                           self.other_node,
9550                                                           self.target_node]
9551                               if node_name is not None)
9552
9553     # Release unneeded node locks
9554     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9555
9556     # Release any owned node group
9557     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9558       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9559
9560     # Check whether disks are valid
9561     for disk_idx in self.disks:
9562       instance.FindDisk(disk_idx)
9563
9564     # Get secondary node IP addresses
9565     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9566                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9567
9568   def Exec(self, feedback_fn):
9569     """Execute disk replacement.
9570
9571     This dispatches the disk replacement to the appropriate handler.
9572
9573     """
9574     if self.delay_iallocator:
9575       self._CheckPrereq2()
9576
9577     if __debug__:
9578       # Verify owned locks before starting operation
9579       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9580       assert set(owned_nodes) == set(self.node_secondary_ip), \
9581           ("Incorrect node locks, owning %s, expected %s" %
9582            (owned_nodes, self.node_secondary_ip.keys()))
9583
9584       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9585       assert list(owned_instances) == [self.instance_name], \
9586           "Instance '%s' not locked" % self.instance_name
9587
9588       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9589           "Should not own any node group lock at this point"
9590
9591     if not self.disks:
9592       feedback_fn("No disks need replacement")
9593       return
9594
9595     feedback_fn("Replacing disk(s) %s for %s" %
9596                 (utils.CommaJoin(self.disks), self.instance.name))
9597
9598     activate_disks = (not self.instance.admin_up)
9599
9600     # Activate the instance disks if we're replacing them on a down instance
9601     if activate_disks:
9602       _StartInstanceDisks(self.lu, self.instance, True)
9603
9604     try:
9605       # Should we replace the secondary node?
9606       if self.new_node is not None:
9607         fn = self._ExecDrbd8Secondary
9608       else:
9609         fn = self._ExecDrbd8DiskOnly
9610
9611       result = fn(feedback_fn)
9612     finally:
9613       # Deactivate the instance disks if we're replacing them on a
9614       # down instance
9615       if activate_disks:
9616         _SafeShutdownInstanceDisks(self.lu, self.instance)
9617
9618     if __debug__:
9619       # Verify owned locks
9620       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9621       nodes = frozenset(self.node_secondary_ip)
9622       assert ((self.early_release and not owned_nodes) or
9623               (not self.early_release and not (set(owned_nodes) - nodes))), \
9624         ("Not owning the correct locks, early_release=%s, owned=%r,"
9625          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9626
9627     return result
9628
9629   def _CheckVolumeGroup(self, nodes):
9630     self.lu.LogInfo("Checking volume groups")
9631
9632     vgname = self.cfg.GetVGName()
9633
9634     # Make sure volume group exists on all involved nodes
9635     results = self.rpc.call_vg_list(nodes)
9636     if not results:
9637       raise errors.OpExecError("Can't list volume groups on the nodes")
9638
9639     for node in nodes:
9640       res = results[node]
9641       res.Raise("Error checking node %s" % node)
9642       if vgname not in res.payload:
9643         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9644                                  (vgname, node))
9645
9646   def _CheckDisksExistence(self, nodes):
9647     # Check disk existence
9648     for idx, dev in enumerate(self.instance.disks):
9649       if idx not in self.disks:
9650         continue
9651
9652       for node in nodes:
9653         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9654         self.cfg.SetDiskID(dev, node)
9655
9656         result = self.rpc.call_blockdev_find(node, dev)
9657
9658         msg = result.fail_msg
9659         if msg or not result.payload:
9660           if not msg:
9661             msg = "disk not found"
9662           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9663                                    (idx, node, msg))
9664
9665   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9666     for idx, dev in enumerate(self.instance.disks):
9667       if idx not in self.disks:
9668         continue
9669
9670       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9671                       (idx, node_name))
9672
9673       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9674                                    ldisk=ldisk):
9675         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9676                                  " replace disks for instance %s" %
9677                                  (node_name, self.instance.name))
9678
9679   def _CreateNewStorage(self, node_name):
9680     """Create new storage on the primary or secondary node.
9681
9682     This is only used for same-node replaces, not for changing the
9683     secondary node, hence we don't want to modify the existing disk.
9684
9685     """
9686     iv_names = {}
9687
9688     for idx, dev in enumerate(self.instance.disks):
9689       if idx not in self.disks:
9690         continue
9691
9692       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9693
9694       self.cfg.SetDiskID(dev, node_name)
9695
9696       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9697       names = _GenerateUniqueNames(self.lu, lv_names)
9698
9699       vg_data = dev.children[0].logical_id[0]
9700       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9701                              logical_id=(vg_data, names[0]))
9702       vg_meta = dev.children[1].logical_id[0]
9703       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9704                              logical_id=(vg_meta, names[1]))
9705
9706       new_lvs = [lv_data, lv_meta]
9707       old_lvs = [child.Copy() for child in dev.children]
9708       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9709
9710       # we pass force_create=True to force the LVM creation
9711       for new_lv in new_lvs:
9712         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9713                         _GetInstanceInfoText(self.instance), False)
9714
9715     return iv_names
9716
9717   def _CheckDevices(self, node_name, iv_names):
9718     for name, (dev, _, _) in iv_names.iteritems():
9719       self.cfg.SetDiskID(dev, node_name)
9720
9721       result = self.rpc.call_blockdev_find(node_name, dev)
9722
9723       msg = result.fail_msg
9724       if msg or not result.payload:
9725         if not msg:
9726           msg = "disk not found"
9727         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9728                                  (name, msg))
9729
9730       if result.payload.is_degraded:
9731         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9732
9733   def _RemoveOldStorage(self, node_name, iv_names):
9734     for name, (_, old_lvs, _) in iv_names.iteritems():
9735       self.lu.LogInfo("Remove logical volumes for %s" % name)
9736
9737       for lv in old_lvs:
9738         self.cfg.SetDiskID(lv, node_name)
9739
9740         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9741         if msg:
9742           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9743                              hint="remove unused LVs manually")
9744
9745   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9746     """Replace a disk on the primary or secondary for DRBD 8.
9747
9748     The algorithm for replace is quite complicated:
9749
9750       1. for each disk to be replaced:
9751
9752         1. create new LVs on the target node with unique names
9753         1. detach old LVs from the drbd device
9754         1. rename old LVs to name_replaced.<time_t>
9755         1. rename new LVs to old LVs
9756         1. attach the new LVs (with the old names now) to the drbd device
9757
9758       1. wait for sync across all devices
9759
9760       1. for each modified disk:
9761
9762         1. remove old LVs (which have the name name_replaces.<time_t>)
9763
9764     Failures are not very well handled.
9765
9766     """
9767     steps_total = 6
9768
9769     # Step: check device activation
9770     self.lu.LogStep(1, steps_total, "Check device existence")
9771     self._CheckDisksExistence([self.other_node, self.target_node])
9772     self._CheckVolumeGroup([self.target_node, self.other_node])
9773
9774     # Step: check other node consistency
9775     self.lu.LogStep(2, steps_total, "Check peer consistency")
9776     self._CheckDisksConsistency(self.other_node,
9777                                 self.other_node == self.instance.primary_node,
9778                                 False)
9779
9780     # Step: create new storage
9781     self.lu.LogStep(3, steps_total, "Allocate new storage")
9782     iv_names = self._CreateNewStorage(self.target_node)
9783
9784     # Step: for each lv, detach+rename*2+attach
9785     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9786     for dev, old_lvs, new_lvs in iv_names.itervalues():
9787       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9788
9789       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9790                                                      old_lvs)
9791       result.Raise("Can't detach drbd from local storage on node"
9792                    " %s for device %s" % (self.target_node, dev.iv_name))
9793       #dev.children = []
9794       #cfg.Update(instance)
9795
9796       # ok, we created the new LVs, so now we know we have the needed
9797       # storage; as such, we proceed on the target node to rename
9798       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9799       # using the assumption that logical_id == physical_id (which in
9800       # turn is the unique_id on that node)
9801
9802       # FIXME(iustin): use a better name for the replaced LVs
9803       temp_suffix = int(time.time())
9804       ren_fn = lambda d, suff: (d.physical_id[0],
9805                                 d.physical_id[1] + "_replaced-%s" % suff)
9806
9807       # Build the rename list based on what LVs exist on the node
9808       rename_old_to_new = []
9809       for to_ren in old_lvs:
9810         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9811         if not result.fail_msg and result.payload:
9812           # device exists
9813           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9814
9815       self.lu.LogInfo("Renaming the old LVs on the target node")
9816       result = self.rpc.call_blockdev_rename(self.target_node,
9817                                              rename_old_to_new)
9818       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9819
9820       # Now we rename the new LVs to the old LVs
9821       self.lu.LogInfo("Renaming the new LVs on the target node")
9822       rename_new_to_old = [(new, old.physical_id)
9823                            for old, new in zip(old_lvs, new_lvs)]
9824       result = self.rpc.call_blockdev_rename(self.target_node,
9825                                              rename_new_to_old)
9826       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9827
9828       # Intermediate steps of in memory modifications
9829       for old, new in zip(old_lvs, new_lvs):
9830         new.logical_id = old.logical_id
9831         self.cfg.SetDiskID(new, self.target_node)
9832
9833       # We need to modify old_lvs so that removal later removes the
9834       # right LVs, not the newly added ones; note that old_lvs is a
9835       # copy here
9836       for disk in old_lvs:
9837         disk.logical_id = ren_fn(disk, temp_suffix)
9838         self.cfg.SetDiskID(disk, self.target_node)
9839
9840       # Now that the new lvs have the old name, we can add them to the device
9841       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9842       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9843                                                   new_lvs)
9844       msg = result.fail_msg
9845       if msg:
9846         for new_lv in new_lvs:
9847           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9848                                                new_lv).fail_msg
9849           if msg2:
9850             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9851                                hint=("cleanup manually the unused logical"
9852                                      "volumes"))
9853         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9854
9855     cstep = 5
9856     if self.early_release:
9857       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9858       cstep += 1
9859       self._RemoveOldStorage(self.target_node, iv_names)
9860       # WARNING: we release both node locks here, do not do other RPCs
9861       # than WaitForSync to the primary node
9862       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9863                     names=[self.target_node, self.other_node])
9864
9865     # Wait for sync
9866     # This can fail as the old devices are degraded and _WaitForSync
9867     # does a combined result over all disks, so we don't check its return value
9868     self.lu.LogStep(cstep, steps_total, "Sync devices")
9869     cstep += 1
9870     _WaitForSync(self.lu, self.instance)
9871
9872     # Check all devices manually
9873     self._CheckDevices(self.instance.primary_node, iv_names)
9874
9875     # Step: remove old storage
9876     if not self.early_release:
9877       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9878       cstep += 1
9879       self._RemoveOldStorage(self.target_node, iv_names)
9880
9881   def _ExecDrbd8Secondary(self, feedback_fn):
9882     """Replace the secondary node for DRBD 8.
9883
9884     The algorithm for replace is quite complicated:
9885       - for all disks of the instance:
9886         - create new LVs on the new node with same names
9887         - shutdown the drbd device on the old secondary
9888         - disconnect the drbd network on the primary
9889         - create the drbd device on the new secondary
9890         - network attach the drbd on the primary, using an artifice:
9891           the drbd code for Attach() will connect to the network if it
9892           finds a device which is connected to the good local disks but
9893           not network enabled
9894       - wait for sync across all devices
9895       - remove all disks from the old secondary
9896
9897     Failures are not very well handled.
9898
9899     """
9900     steps_total = 6
9901
9902     pnode = self.instance.primary_node
9903
9904     # Step: check device activation
9905     self.lu.LogStep(1, steps_total, "Check device existence")
9906     self._CheckDisksExistence([self.instance.primary_node])
9907     self._CheckVolumeGroup([self.instance.primary_node])
9908
9909     # Step: check other node consistency
9910     self.lu.LogStep(2, steps_total, "Check peer consistency")
9911     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9912
9913     # Step: create new storage
9914     self.lu.LogStep(3, steps_total, "Allocate new storage")
9915     for idx, dev in enumerate(self.instance.disks):
9916       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9917                       (self.new_node, idx))
9918       # we pass force_create=True to force LVM creation
9919       for new_lv in dev.children:
9920         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9921                         _GetInstanceInfoText(self.instance), False)
9922
9923     # Step 4: dbrd minors and drbd setups changes
9924     # after this, we must manually remove the drbd minors on both the
9925     # error and the success paths
9926     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9927     minors = self.cfg.AllocateDRBDMinor([self.new_node
9928                                          for dev in self.instance.disks],
9929                                         self.instance.name)
9930     logging.debug("Allocated minors %r", minors)
9931
9932     iv_names = {}
9933     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9934       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9935                       (self.new_node, idx))
9936       # create new devices on new_node; note that we create two IDs:
9937       # one without port, so the drbd will be activated without
9938       # networking information on the new node at this stage, and one
9939       # with network, for the latter activation in step 4
9940       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9941       if self.instance.primary_node == o_node1:
9942         p_minor = o_minor1
9943       else:
9944         assert self.instance.primary_node == o_node2, "Three-node instance?"
9945         p_minor = o_minor2
9946
9947       new_alone_id = (self.instance.primary_node, self.new_node, None,
9948                       p_minor, new_minor, o_secret)
9949       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9950                     p_minor, new_minor, o_secret)
9951
9952       iv_names[idx] = (dev, dev.children, new_net_id)
9953       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9954                     new_net_id)
9955       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9956                               logical_id=new_alone_id,
9957                               children=dev.children,
9958                               size=dev.size)
9959       try:
9960         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9961                               _GetInstanceInfoText(self.instance), False)
9962       except errors.GenericError:
9963         self.cfg.ReleaseDRBDMinors(self.instance.name)
9964         raise
9965
9966     # We have new devices, shutdown the drbd on the old secondary
9967     for idx, dev in enumerate(self.instance.disks):
9968       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9969       self.cfg.SetDiskID(dev, self.target_node)
9970       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9971       if msg:
9972         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9973                            "node: %s" % (idx, msg),
9974                            hint=("Please cleanup this device manually as"
9975                                  " soon as possible"))
9976
9977     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9978     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9979                                                self.instance.disks)[pnode]
9980
9981     msg = result.fail_msg
9982     if msg:
9983       # detaches didn't succeed (unlikely)
9984       self.cfg.ReleaseDRBDMinors(self.instance.name)
9985       raise errors.OpExecError("Can't detach the disks from the network on"
9986                                " old node: %s" % (msg,))
9987
9988     # if we managed to detach at least one, we update all the disks of
9989     # the instance to point to the new secondary
9990     self.lu.LogInfo("Updating instance configuration")
9991     for dev, _, new_logical_id in iv_names.itervalues():
9992       dev.logical_id = new_logical_id
9993       self.cfg.SetDiskID(dev, self.instance.primary_node)
9994
9995     self.cfg.Update(self.instance, feedback_fn)
9996
9997     # and now perform the drbd attach
9998     self.lu.LogInfo("Attaching primary drbds to new secondary"
9999                     " (standalone => connected)")
10000     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10001                                             self.new_node],
10002                                            self.node_secondary_ip,
10003                                            self.instance.disks,
10004                                            self.instance.name,
10005                                            False)
10006     for to_node, to_result in result.items():
10007       msg = to_result.fail_msg
10008       if msg:
10009         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10010                            to_node, msg,
10011                            hint=("please do a gnt-instance info to see the"
10012                                  " status of disks"))
10013     cstep = 5
10014     if self.early_release:
10015       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10016       cstep += 1
10017       self._RemoveOldStorage(self.target_node, iv_names)
10018       # WARNING: we release all node locks here, do not do other RPCs
10019       # than WaitForSync to the primary node
10020       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10021                     names=[self.instance.primary_node,
10022                            self.target_node,
10023                            self.new_node])
10024
10025     # Wait for sync
10026     # This can fail as the old devices are degraded and _WaitForSync
10027     # does a combined result over all disks, so we don't check its return value
10028     self.lu.LogStep(cstep, steps_total, "Sync devices")
10029     cstep += 1
10030     _WaitForSync(self.lu, self.instance)
10031
10032     # Check all devices manually
10033     self._CheckDevices(self.instance.primary_node, iv_names)
10034
10035     # Step: remove old storage
10036     if not self.early_release:
10037       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10038       self._RemoveOldStorage(self.target_node, iv_names)
10039
10040
10041 class LURepairNodeStorage(NoHooksLU):
10042   """Repairs the volume group on a node.
10043
10044   """
10045   REQ_BGL = False
10046
10047   def CheckArguments(self):
10048     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10049
10050     storage_type = self.op.storage_type
10051
10052     if (constants.SO_FIX_CONSISTENCY not in
10053         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10054       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10055                                  " repaired" % storage_type,
10056                                  errors.ECODE_INVAL)
10057
10058   def ExpandNames(self):
10059     self.needed_locks = {
10060       locking.LEVEL_NODE: [self.op.node_name],
10061       }
10062
10063   def _CheckFaultyDisks(self, instance, node_name):
10064     """Ensure faulty disks abort the opcode or at least warn."""
10065     try:
10066       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10067                                   node_name, True):
10068         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10069                                    " node '%s'" % (instance.name, node_name),
10070                                    errors.ECODE_STATE)
10071     except errors.OpPrereqError, err:
10072       if self.op.ignore_consistency:
10073         self.proc.LogWarning(str(err.args[0]))
10074       else:
10075         raise
10076
10077   def CheckPrereq(self):
10078     """Check prerequisites.
10079
10080     """
10081     # Check whether any instance on this node has faulty disks
10082     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10083       if not inst.admin_up:
10084         continue
10085       check_nodes = set(inst.all_nodes)
10086       check_nodes.discard(self.op.node_name)
10087       for inst_node_name in check_nodes:
10088         self._CheckFaultyDisks(inst, inst_node_name)
10089
10090   def Exec(self, feedback_fn):
10091     feedback_fn("Repairing storage unit '%s' on %s ..." %
10092                 (self.op.name, self.op.node_name))
10093
10094     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10095     result = self.rpc.call_storage_execute(self.op.node_name,
10096                                            self.op.storage_type, st_args,
10097                                            self.op.name,
10098                                            constants.SO_FIX_CONSISTENCY)
10099     result.Raise("Failed to repair storage unit '%s' on %s" %
10100                  (self.op.name, self.op.node_name))
10101
10102
10103 class LUNodeEvacuate(NoHooksLU):
10104   """Evacuates instances off a list of nodes.
10105
10106   """
10107   REQ_BGL = False
10108
10109   def CheckArguments(self):
10110     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10111
10112   def ExpandNames(self):
10113     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10114
10115     if self.op.remote_node is not None:
10116       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10117       assert self.op.remote_node
10118
10119       if self.op.remote_node == self.op.node_name:
10120         raise errors.OpPrereqError("Can not use evacuated node as a new"
10121                                    " secondary node", errors.ECODE_INVAL)
10122
10123       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10124         raise errors.OpPrereqError("Without the use of an iallocator only"
10125                                    " secondary instances can be evacuated",
10126                                    errors.ECODE_INVAL)
10127
10128     # Declare locks
10129     self.share_locks = _ShareAll()
10130     self.needed_locks = {
10131       locking.LEVEL_INSTANCE: [],
10132       locking.LEVEL_NODEGROUP: [],
10133       locking.LEVEL_NODE: [],
10134       }
10135
10136     # Determine nodes (via group) optimistically, needs verification once locks
10137     # have been acquired
10138     self.lock_nodes = self._DetermineNodes()
10139
10140   def _DetermineNodes(self):
10141     """Gets the list of nodes to operate on.
10142
10143     """
10144     if self.op.remote_node is None:
10145       # Iallocator will choose any node(s) in the same group
10146       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10147     else:
10148       group_nodes = frozenset([self.op.remote_node])
10149
10150     # Determine nodes to be locked
10151     return set([self.op.node_name]) | group_nodes
10152
10153   def _DetermineInstances(self):
10154     """Builds list of instances to operate on.
10155
10156     """
10157     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10158
10159     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10160       # Primary instances only
10161       inst_fn = _GetNodePrimaryInstances
10162       assert self.op.remote_node is None, \
10163         "Evacuating primary instances requires iallocator"
10164     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10165       # Secondary instances only
10166       inst_fn = _GetNodeSecondaryInstances
10167     else:
10168       # All instances
10169       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10170       inst_fn = _GetNodeInstances
10171       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10172       # per instance
10173       raise errors.OpPrereqError("Due to an issue with the iallocator"
10174                                  " interface it is not possible to evacuate"
10175                                  " all instances at once; specify explicitly"
10176                                  " whether to evacuate primary or secondary"
10177                                  " instances",
10178                                  errors.ECODE_INVAL)
10179
10180     return inst_fn(self.cfg, self.op.node_name)
10181
10182   def DeclareLocks(self, level):
10183     if level == locking.LEVEL_INSTANCE:
10184       # Lock instances optimistically, needs verification once node and group
10185       # locks have been acquired
10186       self.needed_locks[locking.LEVEL_INSTANCE] = \
10187         set(i.name for i in self._DetermineInstances())
10188
10189     elif level == locking.LEVEL_NODEGROUP:
10190       # Lock node groups for all potential target nodes optimistically, needs
10191       # verification once nodes have been acquired
10192       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10193         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10194
10195     elif level == locking.LEVEL_NODE:
10196       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10197
10198   def CheckPrereq(self):
10199     # Verify locks
10200     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10201     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10202     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10203
10204     need_nodes = self._DetermineNodes()
10205
10206     if not owned_nodes.issuperset(need_nodes):
10207       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10208                                  " locks were acquired, current nodes are"
10209                                  " are '%s', used to be '%s'; retry the"
10210                                  " operation" %
10211                                  (self.op.node_name,
10212                                   utils.CommaJoin(need_nodes),
10213                                   utils.CommaJoin(owned_nodes)),
10214                                  errors.ECODE_STATE)
10215
10216     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10217     if owned_groups != wanted_groups:
10218       raise errors.OpExecError("Node groups changed since locks were acquired,"
10219                                " current groups are '%s', used to be '%s';"
10220                                " retry the operation" %
10221                                (utils.CommaJoin(wanted_groups),
10222                                 utils.CommaJoin(owned_groups)))
10223
10224     # Determine affected instances
10225     self.instances = self._DetermineInstances()
10226     self.instance_names = [i.name for i in self.instances]
10227
10228     if set(self.instance_names) != owned_instances:
10229       raise errors.OpExecError("Instances on node '%s' changed since locks"
10230                                " were acquired, current instances are '%s',"
10231                                " used to be '%s'; retry the operation" %
10232                                (self.op.node_name,
10233                                 utils.CommaJoin(self.instance_names),
10234                                 utils.CommaJoin(owned_instances)))
10235
10236     if self.instance_names:
10237       self.LogInfo("Evacuating instances from node '%s': %s",
10238                    self.op.node_name,
10239                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10240     else:
10241       self.LogInfo("No instances to evacuate from node '%s'",
10242                    self.op.node_name)
10243
10244     if self.op.remote_node is not None:
10245       for i in self.instances:
10246         if i.primary_node == self.op.remote_node:
10247           raise errors.OpPrereqError("Node %s is the primary node of"
10248                                      " instance %s, cannot use it as"
10249                                      " secondary" %
10250                                      (self.op.remote_node, i.name),
10251                                      errors.ECODE_INVAL)
10252
10253   def Exec(self, feedback_fn):
10254     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10255
10256     if not self.instance_names:
10257       # No instances to evacuate
10258       jobs = []
10259
10260     elif self.op.iallocator is not None:
10261       # TODO: Implement relocation to other group
10262       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10263                        evac_mode=self.op.mode,
10264                        instances=list(self.instance_names))
10265
10266       ial.Run(self.op.iallocator)
10267
10268       if not ial.success:
10269         raise errors.OpPrereqError("Can't compute node evacuation using"
10270                                    " iallocator '%s': %s" %
10271                                    (self.op.iallocator, ial.info),
10272                                    errors.ECODE_NORES)
10273
10274       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10275
10276     elif self.op.remote_node is not None:
10277       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10278       jobs = [
10279         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10280                                         remote_node=self.op.remote_node,
10281                                         disks=[],
10282                                         mode=constants.REPLACE_DISK_CHG,
10283                                         early_release=self.op.early_release)]
10284         for instance_name in self.instance_names
10285         ]
10286
10287     else:
10288       raise errors.ProgrammerError("No iallocator or remote node")
10289
10290     return ResultWithJobs(jobs)
10291
10292
10293 def _SetOpEarlyRelease(early_release, op):
10294   """Sets C{early_release} flag on opcodes if available.
10295
10296   """
10297   try:
10298     op.early_release = early_release
10299   except AttributeError:
10300     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10301
10302   return op
10303
10304
10305 def _NodeEvacDest(use_nodes, group, nodes):
10306   """Returns group or nodes depending on caller's choice.
10307
10308   """
10309   if use_nodes:
10310     return utils.CommaJoin(nodes)
10311   else:
10312     return group
10313
10314
10315 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10316   """Unpacks the result of change-group and node-evacuate iallocator requests.
10317
10318   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10319   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10320
10321   @type lu: L{LogicalUnit}
10322   @param lu: Logical unit instance
10323   @type alloc_result: tuple/list
10324   @param alloc_result: Result from iallocator
10325   @type early_release: bool
10326   @param early_release: Whether to release locks early if possible
10327   @type use_nodes: bool
10328   @param use_nodes: Whether to display node names instead of groups
10329
10330   """
10331   (moved, failed, jobs) = alloc_result
10332
10333   if failed:
10334     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10335                                  for (name, reason) in failed)
10336     lu.LogWarning("Unable to evacuate instances %s", failreason)
10337     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10338
10339   if moved:
10340     lu.LogInfo("Instances to be moved: %s",
10341                utils.CommaJoin("%s (to %s)" %
10342                                (name, _NodeEvacDest(use_nodes, group, nodes))
10343                                for (name, group, nodes) in moved))
10344
10345   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10346               map(opcodes.OpCode.LoadOpCode, ops))
10347           for ops in jobs]
10348
10349
10350 class LUInstanceGrowDisk(LogicalUnit):
10351   """Grow a disk of an instance.
10352
10353   """
10354   HPATH = "disk-grow"
10355   HTYPE = constants.HTYPE_INSTANCE
10356   REQ_BGL = False
10357
10358   def ExpandNames(self):
10359     self._ExpandAndLockInstance()
10360     self.needed_locks[locking.LEVEL_NODE] = []
10361     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10362
10363   def DeclareLocks(self, level):
10364     if level == locking.LEVEL_NODE:
10365       self._LockInstancesNodes()
10366
10367   def BuildHooksEnv(self):
10368     """Build hooks env.
10369
10370     This runs on the master, the primary and all the secondaries.
10371
10372     """
10373     env = {
10374       "DISK": self.op.disk,
10375       "AMOUNT": self.op.amount,
10376       }
10377     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10378     return env
10379
10380   def BuildHooksNodes(self):
10381     """Build hooks nodes.
10382
10383     """
10384     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10385     return (nl, nl)
10386
10387   def CheckPrereq(self):
10388     """Check prerequisites.
10389
10390     This checks that the instance is in the cluster.
10391
10392     """
10393     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10394     assert instance is not None, \
10395       "Cannot retrieve locked instance %s" % self.op.instance_name
10396     nodenames = list(instance.all_nodes)
10397     for node in nodenames:
10398       _CheckNodeOnline(self, node)
10399
10400     self.instance = instance
10401
10402     if instance.disk_template not in constants.DTS_GROWABLE:
10403       raise errors.OpPrereqError("Instance's disk layout does not support"
10404                                  " growing", errors.ECODE_INVAL)
10405
10406     self.disk = instance.FindDisk(self.op.disk)
10407
10408     if instance.disk_template not in (constants.DT_FILE,
10409                                       constants.DT_SHARED_FILE):
10410       # TODO: check the free disk space for file, when that feature will be
10411       # supported
10412       _CheckNodesFreeDiskPerVG(self, nodenames,
10413                                self.disk.ComputeGrowth(self.op.amount))
10414
10415   def Exec(self, feedback_fn):
10416     """Execute disk grow.
10417
10418     """
10419     instance = self.instance
10420     disk = self.disk
10421
10422     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10423     if not disks_ok:
10424       raise errors.OpExecError("Cannot activate block device to grow")
10425
10426     # First run all grow ops in dry-run mode
10427     for node in instance.all_nodes:
10428       self.cfg.SetDiskID(disk, node)
10429       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10430       result.Raise("Grow request failed to node %s" % node)
10431
10432     # We know that (as far as we can test) operations across different
10433     # nodes will succeed, time to run it for real
10434     for node in instance.all_nodes:
10435       self.cfg.SetDiskID(disk, node)
10436       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10437       result.Raise("Grow request failed to node %s" % node)
10438
10439       # TODO: Rewrite code to work properly
10440       # DRBD goes into sync mode for a short amount of time after executing the
10441       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10442       # calling "resize" in sync mode fails. Sleeping for a short amount of
10443       # time is a work-around.
10444       time.sleep(5)
10445
10446     disk.RecordGrow(self.op.amount)
10447     self.cfg.Update(instance, feedback_fn)
10448     if self.op.wait_for_sync:
10449       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10450       if disk_abort:
10451         self.proc.LogWarning("Disk sync-ing has not returned a good"
10452                              " status; please check the instance")
10453       if not instance.admin_up:
10454         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10455     elif not instance.admin_up:
10456       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10457                            " not supposed to be running because no wait for"
10458                            " sync mode was requested")
10459
10460
10461 class LUInstanceQueryData(NoHooksLU):
10462   """Query runtime instance data.
10463
10464   """
10465   REQ_BGL = False
10466
10467   def ExpandNames(self):
10468     self.needed_locks = {}
10469
10470     # Use locking if requested or when non-static information is wanted
10471     if not (self.op.static or self.op.use_locking):
10472       self.LogWarning("Non-static data requested, locks need to be acquired")
10473       self.op.use_locking = True
10474
10475     if self.op.instances or not self.op.use_locking:
10476       # Expand instance names right here
10477       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10478     else:
10479       # Will use acquired locks
10480       self.wanted_names = None
10481
10482     if self.op.use_locking:
10483       self.share_locks = _ShareAll()
10484
10485       if self.wanted_names is None:
10486         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10487       else:
10488         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10489
10490       self.needed_locks[locking.LEVEL_NODE] = []
10491       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10492
10493   def DeclareLocks(self, level):
10494     if self.op.use_locking and level == locking.LEVEL_NODE:
10495       self._LockInstancesNodes()
10496
10497   def CheckPrereq(self):
10498     """Check prerequisites.
10499
10500     This only checks the optional instance list against the existing names.
10501
10502     """
10503     if self.wanted_names is None:
10504       assert self.op.use_locking, "Locking was not used"
10505       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10506
10507     self.wanted_instances = \
10508         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10509
10510   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10511     """Returns the status of a block device
10512
10513     """
10514     if self.op.static or not node:
10515       return None
10516
10517     self.cfg.SetDiskID(dev, node)
10518
10519     result = self.rpc.call_blockdev_find(node, dev)
10520     if result.offline:
10521       return None
10522
10523     result.Raise("Can't compute disk status for %s" % instance_name)
10524
10525     status = result.payload
10526     if status is None:
10527       return None
10528
10529     return (status.dev_path, status.major, status.minor,
10530             status.sync_percent, status.estimated_time,
10531             status.is_degraded, status.ldisk_status)
10532
10533   def _ComputeDiskStatus(self, instance, snode, dev):
10534     """Compute block device status.
10535
10536     """
10537     if dev.dev_type in constants.LDS_DRBD:
10538       # we change the snode then (otherwise we use the one passed in)
10539       if dev.logical_id[0] == instance.primary_node:
10540         snode = dev.logical_id[1]
10541       else:
10542         snode = dev.logical_id[0]
10543
10544     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10545                                               instance.name, dev)
10546     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10547
10548     if dev.children:
10549       dev_children = map(compat.partial(self._ComputeDiskStatus,
10550                                         instance, snode),
10551                          dev.children)
10552     else:
10553       dev_children = []
10554
10555     return {
10556       "iv_name": dev.iv_name,
10557       "dev_type": dev.dev_type,
10558       "logical_id": dev.logical_id,
10559       "physical_id": dev.physical_id,
10560       "pstatus": dev_pstatus,
10561       "sstatus": dev_sstatus,
10562       "children": dev_children,
10563       "mode": dev.mode,
10564       "size": dev.size,
10565       }
10566
10567   def Exec(self, feedback_fn):
10568     """Gather and return data"""
10569     result = {}
10570
10571     cluster = self.cfg.GetClusterInfo()
10572
10573     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10574                                           for i in self.wanted_instances)
10575     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10576       if self.op.static or pnode.offline:
10577         remote_state = None
10578         if pnode.offline:
10579           self.LogWarning("Primary node %s is marked offline, returning static"
10580                           " information only for instance %s" %
10581                           (pnode.name, instance.name))
10582       else:
10583         remote_info = self.rpc.call_instance_info(instance.primary_node,
10584                                                   instance.name,
10585                                                   instance.hypervisor)
10586         remote_info.Raise("Error checking node %s" % instance.primary_node)
10587         remote_info = remote_info.payload
10588         if remote_info and "state" in remote_info:
10589           remote_state = "up"
10590         else:
10591           remote_state = "down"
10592
10593       if instance.admin_up:
10594         config_state = "up"
10595       else:
10596         config_state = "down"
10597
10598       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10599                   instance.disks)
10600
10601       result[instance.name] = {
10602         "name": instance.name,
10603         "config_state": config_state,
10604         "run_state": remote_state,
10605         "pnode": instance.primary_node,
10606         "snodes": instance.secondary_nodes,
10607         "os": instance.os,
10608         # this happens to be the same format used for hooks
10609         "nics": _NICListToTuple(self, instance.nics),
10610         "disk_template": instance.disk_template,
10611         "disks": disks,
10612         "hypervisor": instance.hypervisor,
10613         "network_port": instance.network_port,
10614         "hv_instance": instance.hvparams,
10615         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10616         "be_instance": instance.beparams,
10617         "be_actual": cluster.FillBE(instance),
10618         "os_instance": instance.osparams,
10619         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10620         "serial_no": instance.serial_no,
10621         "mtime": instance.mtime,
10622         "ctime": instance.ctime,
10623         "uuid": instance.uuid,
10624         }
10625
10626     return result
10627
10628
10629 class LUInstanceSetParams(LogicalUnit):
10630   """Modifies an instances's parameters.
10631
10632   """
10633   HPATH = "instance-modify"
10634   HTYPE = constants.HTYPE_INSTANCE
10635   REQ_BGL = False
10636
10637   def CheckArguments(self):
10638     if not (self.op.nics or self.op.disks or self.op.disk_template or
10639             self.op.hvparams or self.op.beparams or self.op.os_name):
10640       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10641
10642     if self.op.hvparams:
10643       _CheckGlobalHvParams(self.op.hvparams)
10644
10645     # Disk validation
10646     disk_addremove = 0
10647     for disk_op, disk_dict in self.op.disks:
10648       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10649       if disk_op == constants.DDM_REMOVE:
10650         disk_addremove += 1
10651         continue
10652       elif disk_op == constants.DDM_ADD:
10653         disk_addremove += 1
10654       else:
10655         if not isinstance(disk_op, int):
10656           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10657         if not isinstance(disk_dict, dict):
10658           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10659           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10660
10661       if disk_op == constants.DDM_ADD:
10662         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10663         if mode not in constants.DISK_ACCESS_SET:
10664           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10665                                      errors.ECODE_INVAL)
10666         size = disk_dict.get(constants.IDISK_SIZE, None)
10667         if size is None:
10668           raise errors.OpPrereqError("Required disk parameter size missing",
10669                                      errors.ECODE_INVAL)
10670         try:
10671           size = int(size)
10672         except (TypeError, ValueError), err:
10673           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10674                                      str(err), errors.ECODE_INVAL)
10675         disk_dict[constants.IDISK_SIZE] = size
10676       else:
10677         # modification of disk
10678         if constants.IDISK_SIZE in disk_dict:
10679           raise errors.OpPrereqError("Disk size change not possible, use"
10680                                      " grow-disk", errors.ECODE_INVAL)
10681
10682     if disk_addremove > 1:
10683       raise errors.OpPrereqError("Only one disk add or remove operation"
10684                                  " supported at a time", errors.ECODE_INVAL)
10685
10686     if self.op.disks and self.op.disk_template is not None:
10687       raise errors.OpPrereqError("Disk template conversion and other disk"
10688                                  " changes not supported at the same time",
10689                                  errors.ECODE_INVAL)
10690
10691     if (self.op.disk_template and
10692         self.op.disk_template in constants.DTS_INT_MIRROR and
10693         self.op.remote_node is None):
10694       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10695                                  " one requires specifying a secondary node",
10696                                  errors.ECODE_INVAL)
10697
10698     # NIC validation
10699     nic_addremove = 0
10700     for nic_op, nic_dict in self.op.nics:
10701       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10702       if nic_op == constants.DDM_REMOVE:
10703         nic_addremove += 1
10704         continue
10705       elif nic_op == constants.DDM_ADD:
10706         nic_addremove += 1
10707       else:
10708         if not isinstance(nic_op, int):
10709           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10710         if not isinstance(nic_dict, dict):
10711           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10712           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10713
10714       # nic_dict should be a dict
10715       nic_ip = nic_dict.get(constants.INIC_IP, None)
10716       if nic_ip is not None:
10717         if nic_ip.lower() == constants.VALUE_NONE:
10718           nic_dict[constants.INIC_IP] = None
10719         else:
10720           if not netutils.IPAddress.IsValid(nic_ip):
10721             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10722                                        errors.ECODE_INVAL)
10723
10724       nic_bridge = nic_dict.get("bridge", None)
10725       nic_link = nic_dict.get(constants.INIC_LINK, None)
10726       if nic_bridge and nic_link:
10727         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10728                                    " at the same time", errors.ECODE_INVAL)
10729       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10730         nic_dict["bridge"] = None
10731       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10732         nic_dict[constants.INIC_LINK] = None
10733
10734       if nic_op == constants.DDM_ADD:
10735         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10736         if nic_mac is None:
10737           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10738
10739       if constants.INIC_MAC in nic_dict:
10740         nic_mac = nic_dict[constants.INIC_MAC]
10741         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10742           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10743
10744         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10745           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10746                                      " modifying an existing nic",
10747                                      errors.ECODE_INVAL)
10748
10749     if nic_addremove > 1:
10750       raise errors.OpPrereqError("Only one NIC add or remove operation"
10751                                  " supported at a time", errors.ECODE_INVAL)
10752
10753   def ExpandNames(self):
10754     self._ExpandAndLockInstance()
10755     self.needed_locks[locking.LEVEL_NODE] = []
10756     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10757
10758   def DeclareLocks(self, level):
10759     if level == locking.LEVEL_NODE:
10760       self._LockInstancesNodes()
10761       if self.op.disk_template and self.op.remote_node:
10762         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10763         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10764
10765   def BuildHooksEnv(self):
10766     """Build hooks env.
10767
10768     This runs on the master, primary and secondaries.
10769
10770     """
10771     args = dict()
10772     if constants.BE_MEMORY in self.be_new:
10773       args["memory"] = self.be_new[constants.BE_MEMORY]
10774     if constants.BE_VCPUS in self.be_new:
10775       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10776     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10777     # information at all.
10778     if self.op.nics:
10779       args["nics"] = []
10780       nic_override = dict(self.op.nics)
10781       for idx, nic in enumerate(self.instance.nics):
10782         if idx in nic_override:
10783           this_nic_override = nic_override[idx]
10784         else:
10785           this_nic_override = {}
10786         if constants.INIC_IP in this_nic_override:
10787           ip = this_nic_override[constants.INIC_IP]
10788         else:
10789           ip = nic.ip
10790         if constants.INIC_MAC in this_nic_override:
10791           mac = this_nic_override[constants.INIC_MAC]
10792         else:
10793           mac = nic.mac
10794         if idx in self.nic_pnew:
10795           nicparams = self.nic_pnew[idx]
10796         else:
10797           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10798         mode = nicparams[constants.NIC_MODE]
10799         link = nicparams[constants.NIC_LINK]
10800         args["nics"].append((ip, mac, mode, link))
10801       if constants.DDM_ADD in nic_override:
10802         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10803         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10804         nicparams = self.nic_pnew[constants.DDM_ADD]
10805         mode = nicparams[constants.NIC_MODE]
10806         link = nicparams[constants.NIC_LINK]
10807         args["nics"].append((ip, mac, mode, link))
10808       elif constants.DDM_REMOVE in nic_override:
10809         del args["nics"][-1]
10810
10811     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10812     if self.op.disk_template:
10813       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10814
10815     return env
10816
10817   def BuildHooksNodes(self):
10818     """Build hooks nodes.
10819
10820     """
10821     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10822     return (nl, nl)
10823
10824   def CheckPrereq(self):
10825     """Check prerequisites.
10826
10827     This only checks the instance list against the existing names.
10828
10829     """
10830     # checking the new params on the primary/secondary nodes
10831
10832     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10833     cluster = self.cluster = self.cfg.GetClusterInfo()
10834     assert self.instance is not None, \
10835       "Cannot retrieve locked instance %s" % self.op.instance_name
10836     pnode = instance.primary_node
10837     nodelist = list(instance.all_nodes)
10838
10839     # OS change
10840     if self.op.os_name and not self.op.force:
10841       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10842                       self.op.force_variant)
10843       instance_os = self.op.os_name
10844     else:
10845       instance_os = instance.os
10846
10847     if self.op.disk_template:
10848       if instance.disk_template == self.op.disk_template:
10849         raise errors.OpPrereqError("Instance already has disk template %s" %
10850                                    instance.disk_template, errors.ECODE_INVAL)
10851
10852       if (instance.disk_template,
10853           self.op.disk_template) not in self._DISK_CONVERSIONS:
10854         raise errors.OpPrereqError("Unsupported disk template conversion from"
10855                                    " %s to %s" % (instance.disk_template,
10856                                                   self.op.disk_template),
10857                                    errors.ECODE_INVAL)
10858       _CheckInstanceDown(self, instance, "cannot change disk template")
10859       if self.op.disk_template in constants.DTS_INT_MIRROR:
10860         if self.op.remote_node == pnode:
10861           raise errors.OpPrereqError("Given new secondary node %s is the same"
10862                                      " as the primary node of the instance" %
10863                                      self.op.remote_node, errors.ECODE_STATE)
10864         _CheckNodeOnline(self, self.op.remote_node)
10865         _CheckNodeNotDrained(self, self.op.remote_node)
10866         # FIXME: here we assume that the old instance type is DT_PLAIN
10867         assert instance.disk_template == constants.DT_PLAIN
10868         disks = [{constants.IDISK_SIZE: d.size,
10869                   constants.IDISK_VG: d.logical_id[0]}
10870                  for d in instance.disks]
10871         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10872         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10873
10874     # hvparams processing
10875     if self.op.hvparams:
10876       hv_type = instance.hypervisor
10877       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10878       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10879       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10880
10881       # local check
10882       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10883       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10884       self.hv_new = hv_new # the new actual values
10885       self.hv_inst = i_hvdict # the new dict (without defaults)
10886     else:
10887       self.hv_new = self.hv_inst = {}
10888
10889     # beparams processing
10890     if self.op.beparams:
10891       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10892                                    use_none=True)
10893       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10894       be_new = cluster.SimpleFillBE(i_bedict)
10895       self.be_new = be_new # the new actual values
10896       self.be_inst = i_bedict # the new dict (without defaults)
10897     else:
10898       self.be_new = self.be_inst = {}
10899     be_old = cluster.FillBE(instance)
10900
10901     # osparams processing
10902     if self.op.osparams:
10903       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10904       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10905       self.os_inst = i_osdict # the new dict (without defaults)
10906     else:
10907       self.os_inst = {}
10908
10909     self.warn = []
10910
10911     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10912         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10913       mem_check_list = [pnode]
10914       if be_new[constants.BE_AUTO_BALANCE]:
10915         # either we changed auto_balance to yes or it was from before
10916         mem_check_list.extend(instance.secondary_nodes)
10917       instance_info = self.rpc.call_instance_info(pnode, instance.name,
10918                                                   instance.hypervisor)
10919       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10920                                          instance.hypervisor)
10921       pninfo = nodeinfo[pnode]
10922       msg = pninfo.fail_msg
10923       if msg:
10924         # Assume the primary node is unreachable and go ahead
10925         self.warn.append("Can't get info from primary node %s: %s" %
10926                          (pnode, msg))
10927       elif not isinstance(pninfo.payload.get("memory_free", None), int):
10928         self.warn.append("Node data from primary node %s doesn't contain"
10929                          " free memory information" % pnode)
10930       elif instance_info.fail_msg:
10931         self.warn.append("Can't get instance runtime information: %s" %
10932                         instance_info.fail_msg)
10933       else:
10934         if instance_info.payload:
10935           current_mem = int(instance_info.payload["memory"])
10936         else:
10937           # Assume instance not running
10938           # (there is a slight race condition here, but it's not very probable,
10939           # and we have no other way to check)
10940           current_mem = 0
10941         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10942                     pninfo.payload["memory_free"])
10943         if miss_mem > 0:
10944           raise errors.OpPrereqError("This change will prevent the instance"
10945                                      " from starting, due to %d MB of memory"
10946                                      " missing on its primary node" % miss_mem,
10947                                      errors.ECODE_NORES)
10948
10949       if be_new[constants.BE_AUTO_BALANCE]:
10950         for node, nres in nodeinfo.items():
10951           if node not in instance.secondary_nodes:
10952             continue
10953           nres.Raise("Can't get info from secondary node %s" % node,
10954                      prereq=True, ecode=errors.ECODE_STATE)
10955           if not isinstance(nres.payload.get("memory_free", None), int):
10956             raise errors.OpPrereqError("Secondary node %s didn't return free"
10957                                        " memory information" % node,
10958                                        errors.ECODE_STATE)
10959           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10960             raise errors.OpPrereqError("This change will prevent the instance"
10961                                        " from failover to its secondary node"
10962                                        " %s, due to not enough memory" % node,
10963                                        errors.ECODE_STATE)
10964
10965     # NIC processing
10966     self.nic_pnew = {}
10967     self.nic_pinst = {}
10968     for nic_op, nic_dict in self.op.nics:
10969       if nic_op == constants.DDM_REMOVE:
10970         if not instance.nics:
10971           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10972                                      errors.ECODE_INVAL)
10973         continue
10974       if nic_op != constants.DDM_ADD:
10975         # an existing nic
10976         if not instance.nics:
10977           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10978                                      " no NICs" % nic_op,
10979                                      errors.ECODE_INVAL)
10980         if nic_op < 0 or nic_op >= len(instance.nics):
10981           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10982                                      " are 0 to %d" %
10983                                      (nic_op, len(instance.nics) - 1),
10984                                      errors.ECODE_INVAL)
10985         old_nic_params = instance.nics[nic_op].nicparams
10986         old_nic_ip = instance.nics[nic_op].ip
10987       else:
10988         old_nic_params = {}
10989         old_nic_ip = None
10990
10991       update_params_dict = dict([(key, nic_dict[key])
10992                                  for key in constants.NICS_PARAMETERS
10993                                  if key in nic_dict])
10994
10995       if "bridge" in nic_dict:
10996         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10997
10998       new_nic_params = _GetUpdatedParams(old_nic_params,
10999                                          update_params_dict)
11000       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11001       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11002       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11003       self.nic_pinst[nic_op] = new_nic_params
11004       self.nic_pnew[nic_op] = new_filled_nic_params
11005       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11006
11007       if new_nic_mode == constants.NIC_MODE_BRIDGED:
11008         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11009         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11010         if msg:
11011           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11012           if self.op.force:
11013             self.warn.append(msg)
11014           else:
11015             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11016       if new_nic_mode == constants.NIC_MODE_ROUTED:
11017         if constants.INIC_IP in nic_dict:
11018           nic_ip = nic_dict[constants.INIC_IP]
11019         else:
11020           nic_ip = old_nic_ip
11021         if nic_ip is None:
11022           raise errors.OpPrereqError("Cannot set the nic ip to None"
11023                                      " on a routed nic", errors.ECODE_INVAL)
11024       if constants.INIC_MAC in nic_dict:
11025         nic_mac = nic_dict[constants.INIC_MAC]
11026         if nic_mac is None:
11027           raise errors.OpPrereqError("Cannot set the nic mac to None",
11028                                      errors.ECODE_INVAL)
11029         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11030           # otherwise generate the mac
11031           nic_dict[constants.INIC_MAC] = \
11032             self.cfg.GenerateMAC(self.proc.GetECId())
11033         else:
11034           # or validate/reserve the current one
11035           try:
11036             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11037           except errors.ReservationError:
11038             raise errors.OpPrereqError("MAC address %s already in use"
11039                                        " in cluster" % nic_mac,
11040                                        errors.ECODE_NOTUNIQUE)
11041
11042     # DISK processing
11043     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11044       raise errors.OpPrereqError("Disk operations not supported for"
11045                                  " diskless instances",
11046                                  errors.ECODE_INVAL)
11047     for disk_op, _ in self.op.disks:
11048       if disk_op == constants.DDM_REMOVE:
11049         if len(instance.disks) == 1:
11050           raise errors.OpPrereqError("Cannot remove the last disk of"
11051                                      " an instance", errors.ECODE_INVAL)
11052         _CheckInstanceDown(self, instance, "cannot remove disks")
11053
11054       if (disk_op == constants.DDM_ADD and
11055           len(instance.disks) >= constants.MAX_DISKS):
11056         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11057                                    " add more" % constants.MAX_DISKS,
11058                                    errors.ECODE_STATE)
11059       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11060         # an existing disk
11061         if disk_op < 0 or disk_op >= len(instance.disks):
11062           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11063                                      " are 0 to %d" %
11064                                      (disk_op, len(instance.disks)),
11065                                      errors.ECODE_INVAL)
11066
11067     return
11068
11069   def _ConvertPlainToDrbd(self, feedback_fn):
11070     """Converts an instance from plain to drbd.
11071
11072     """
11073     feedback_fn("Converting template to drbd")
11074     instance = self.instance
11075     pnode = instance.primary_node
11076     snode = self.op.remote_node
11077
11078     # create a fake disk info for _GenerateDiskTemplate
11079     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11080                   constants.IDISK_VG: d.logical_id[0]}
11081                  for d in instance.disks]
11082     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11083                                       instance.name, pnode, [snode],
11084                                       disk_info, None, None, 0, feedback_fn)
11085     info = _GetInstanceInfoText(instance)
11086     feedback_fn("Creating aditional volumes...")
11087     # first, create the missing data and meta devices
11088     for disk in new_disks:
11089       # unfortunately this is... not too nice
11090       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11091                             info, True)
11092       for child in disk.children:
11093         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11094     # at this stage, all new LVs have been created, we can rename the
11095     # old ones
11096     feedback_fn("Renaming original volumes...")
11097     rename_list = [(o, n.children[0].logical_id)
11098                    for (o, n) in zip(instance.disks, new_disks)]
11099     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11100     result.Raise("Failed to rename original LVs")
11101
11102     feedback_fn("Initializing DRBD devices...")
11103     # all child devices are in place, we can now create the DRBD devices
11104     for disk in new_disks:
11105       for node in [pnode, snode]:
11106         f_create = node == pnode
11107         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11108
11109     # at this point, the instance has been modified
11110     instance.disk_template = constants.DT_DRBD8
11111     instance.disks = new_disks
11112     self.cfg.Update(instance, feedback_fn)
11113
11114     # disks are created, waiting for sync
11115     disk_abort = not _WaitForSync(self, instance,
11116                                   oneshot=not self.op.wait_for_sync)
11117     if disk_abort:
11118       raise errors.OpExecError("There are some degraded disks for"
11119                                " this instance, please cleanup manually")
11120
11121   def _ConvertDrbdToPlain(self, feedback_fn):
11122     """Converts an instance from drbd to plain.
11123
11124     """
11125     instance = self.instance
11126     assert len(instance.secondary_nodes) == 1
11127     pnode = instance.primary_node
11128     snode = instance.secondary_nodes[0]
11129     feedback_fn("Converting template to plain")
11130
11131     old_disks = instance.disks
11132     new_disks = [d.children[0] for d in old_disks]
11133
11134     # copy over size and mode
11135     for parent, child in zip(old_disks, new_disks):
11136       child.size = parent.size
11137       child.mode = parent.mode
11138
11139     # this is a DRBD disk, return its port to the pool
11140     # NOTE: this must be done right before the call to cfg.Update!
11141     for disk in old_disks:
11142       tcp_port = disk.logical_id[2]
11143       self.cfg.AddTcpUdpPort(tcp_port)
11144
11145     # update instance structure
11146     instance.disks = new_disks
11147     instance.disk_template = constants.DT_PLAIN
11148     self.cfg.Update(instance, feedback_fn)
11149
11150     feedback_fn("Removing volumes on the secondary node...")
11151     for disk in old_disks:
11152       self.cfg.SetDiskID(disk, snode)
11153       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11154       if msg:
11155         self.LogWarning("Could not remove block device %s on node %s,"
11156                         " continuing anyway: %s", disk.iv_name, snode, msg)
11157
11158     feedback_fn("Removing unneeded volumes on the primary node...")
11159     for idx, disk in enumerate(old_disks):
11160       meta = disk.children[1]
11161       self.cfg.SetDiskID(meta, pnode)
11162       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11163       if msg:
11164         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11165                         " continuing anyway: %s", idx, pnode, msg)
11166
11167   def Exec(self, feedback_fn):
11168     """Modifies an instance.
11169
11170     All parameters take effect only at the next restart of the instance.
11171
11172     """
11173     # Process here the warnings from CheckPrereq, as we don't have a
11174     # feedback_fn there.
11175     for warn in self.warn:
11176       feedback_fn("WARNING: %s" % warn)
11177
11178     result = []
11179     instance = self.instance
11180     # disk changes
11181     for disk_op, disk_dict in self.op.disks:
11182       if disk_op == constants.DDM_REMOVE:
11183         # remove the last disk
11184         device = instance.disks.pop()
11185         device_idx = len(instance.disks)
11186         for node, disk in device.ComputeNodeTree(instance.primary_node):
11187           self.cfg.SetDiskID(disk, node)
11188           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11189           if msg:
11190             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11191                             " continuing anyway", device_idx, node, msg)
11192         result.append(("disk/%d" % device_idx, "remove"))
11193
11194         # if this is a DRBD disk, return its port to the pool
11195         if device.dev_type in constants.LDS_DRBD:
11196           tcp_port = device.logical_id[2]
11197           self.cfg.AddTcpUdpPort(tcp_port)
11198       elif disk_op == constants.DDM_ADD:
11199         # add a new disk
11200         if instance.disk_template in (constants.DT_FILE,
11201                                         constants.DT_SHARED_FILE):
11202           file_driver, file_path = instance.disks[0].logical_id
11203           file_path = os.path.dirname(file_path)
11204         else:
11205           file_driver = file_path = None
11206         disk_idx_base = len(instance.disks)
11207         new_disk = _GenerateDiskTemplate(self,
11208                                          instance.disk_template,
11209                                          instance.name, instance.primary_node,
11210                                          instance.secondary_nodes,
11211                                          [disk_dict],
11212                                          file_path,
11213                                          file_driver,
11214                                          disk_idx_base, feedback_fn)[0]
11215         instance.disks.append(new_disk)
11216         info = _GetInstanceInfoText(instance)
11217
11218         logging.info("Creating volume %s for instance %s",
11219                      new_disk.iv_name, instance.name)
11220         # Note: this needs to be kept in sync with _CreateDisks
11221         #HARDCODE
11222         for node in instance.all_nodes:
11223           f_create = node == instance.primary_node
11224           try:
11225             _CreateBlockDev(self, node, instance, new_disk,
11226                             f_create, info, f_create)
11227           except errors.OpExecError, err:
11228             self.LogWarning("Failed to create volume %s (%s) on"
11229                             " node %s: %s",
11230                             new_disk.iv_name, new_disk, node, err)
11231         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11232                        (new_disk.size, new_disk.mode)))
11233       else:
11234         # change a given disk
11235         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11236         result.append(("disk.mode/%d" % disk_op,
11237                        disk_dict[constants.IDISK_MODE]))
11238
11239     if self.op.disk_template:
11240       r_shut = _ShutdownInstanceDisks(self, instance)
11241       if not r_shut:
11242         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11243                                  " proceed with disk template conversion")
11244       mode = (instance.disk_template, self.op.disk_template)
11245       try:
11246         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11247       except:
11248         self.cfg.ReleaseDRBDMinors(instance.name)
11249         raise
11250       result.append(("disk_template", self.op.disk_template))
11251
11252     # NIC changes
11253     for nic_op, nic_dict in self.op.nics:
11254       if nic_op == constants.DDM_REMOVE:
11255         # remove the last nic
11256         del instance.nics[-1]
11257         result.append(("nic.%d" % len(instance.nics), "remove"))
11258       elif nic_op == constants.DDM_ADD:
11259         # mac and bridge should be set, by now
11260         mac = nic_dict[constants.INIC_MAC]
11261         ip = nic_dict.get(constants.INIC_IP, None)
11262         nicparams = self.nic_pinst[constants.DDM_ADD]
11263         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11264         instance.nics.append(new_nic)
11265         result.append(("nic.%d" % (len(instance.nics) - 1),
11266                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11267                        (new_nic.mac, new_nic.ip,
11268                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11269                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11270                        )))
11271       else:
11272         for key in (constants.INIC_MAC, constants.INIC_IP):
11273           if key in nic_dict:
11274             setattr(instance.nics[nic_op], key, nic_dict[key])
11275         if nic_op in self.nic_pinst:
11276           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11277         for key, val in nic_dict.iteritems():
11278           result.append(("nic.%s/%d" % (key, nic_op), val))
11279
11280     # hvparams changes
11281     if self.op.hvparams:
11282       instance.hvparams = self.hv_inst
11283       for key, val in self.op.hvparams.iteritems():
11284         result.append(("hv/%s" % key, val))
11285
11286     # beparams changes
11287     if self.op.beparams:
11288       instance.beparams = self.be_inst
11289       for key, val in self.op.beparams.iteritems():
11290         result.append(("be/%s" % key, val))
11291
11292     # OS change
11293     if self.op.os_name:
11294       instance.os = self.op.os_name
11295
11296     # osparams changes
11297     if self.op.osparams:
11298       instance.osparams = self.os_inst
11299       for key, val in self.op.osparams.iteritems():
11300         result.append(("os/%s" % key, val))
11301
11302     self.cfg.Update(instance, feedback_fn)
11303
11304     return result
11305
11306   _DISK_CONVERSIONS = {
11307     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11308     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11309     }
11310
11311
11312 class LUInstanceChangeGroup(LogicalUnit):
11313   HPATH = "instance-change-group"
11314   HTYPE = constants.HTYPE_INSTANCE
11315   REQ_BGL = False
11316
11317   def ExpandNames(self):
11318     self.share_locks = _ShareAll()
11319     self.needed_locks = {
11320       locking.LEVEL_NODEGROUP: [],
11321       locking.LEVEL_NODE: [],
11322       }
11323
11324     self._ExpandAndLockInstance()
11325
11326     if self.op.target_groups:
11327       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11328                                   self.op.target_groups)
11329     else:
11330       self.req_target_uuids = None
11331
11332     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11333
11334   def DeclareLocks(self, level):
11335     if level == locking.LEVEL_NODEGROUP:
11336       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11337
11338       if self.req_target_uuids:
11339         lock_groups = set(self.req_target_uuids)
11340
11341         # Lock all groups used by instance optimistically; this requires going
11342         # via the node before it's locked, requiring verification later on
11343         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11344         lock_groups.update(instance_groups)
11345       else:
11346         # No target groups, need to lock all of them
11347         lock_groups = locking.ALL_SET
11348
11349       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11350
11351     elif level == locking.LEVEL_NODE:
11352       if self.req_target_uuids:
11353         # Lock all nodes used by instances
11354         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11355         self._LockInstancesNodes()
11356
11357         # Lock all nodes in all potential target groups
11358         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11359                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11360         member_nodes = [node_name
11361                         for group in lock_groups
11362                         for node_name in self.cfg.GetNodeGroup(group).members]
11363         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11364       else:
11365         # Lock all nodes as all groups are potential targets
11366         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11367
11368   def CheckPrereq(self):
11369     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11370     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11371     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11372
11373     assert (self.req_target_uuids is None or
11374             owned_groups.issuperset(self.req_target_uuids))
11375     assert owned_instances == set([self.op.instance_name])
11376
11377     # Get instance information
11378     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11379
11380     # Check if node groups for locked instance are still correct
11381     assert owned_nodes.issuperset(self.instance.all_nodes), \
11382       ("Instance %s's nodes changed while we kept the lock" %
11383        self.op.instance_name)
11384
11385     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11386                                            owned_groups)
11387
11388     if self.req_target_uuids:
11389       # User requested specific target groups
11390       self.target_uuids = frozenset(self.req_target_uuids)
11391     else:
11392       # All groups except those used by the instance are potential targets
11393       self.target_uuids = owned_groups - inst_groups
11394
11395     conflicting_groups = self.target_uuids & inst_groups
11396     if conflicting_groups:
11397       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11398                                  " used by the instance '%s'" %
11399                                  (utils.CommaJoin(conflicting_groups),
11400                                   self.op.instance_name),
11401                                  errors.ECODE_INVAL)
11402
11403     if not self.target_uuids:
11404       raise errors.OpPrereqError("There are no possible target groups",
11405                                  errors.ECODE_INVAL)
11406
11407   def BuildHooksEnv(self):
11408     """Build hooks env.
11409
11410     """
11411     assert self.target_uuids
11412
11413     env = {
11414       "TARGET_GROUPS": " ".join(self.target_uuids),
11415       }
11416
11417     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11418
11419     return env
11420
11421   def BuildHooksNodes(self):
11422     """Build hooks nodes.
11423
11424     """
11425     mn = self.cfg.GetMasterNode()
11426     return ([mn], [mn])
11427
11428   def Exec(self, feedback_fn):
11429     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11430
11431     assert instances == [self.op.instance_name], "Instance not locked"
11432
11433     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11434                      instances=instances, target_groups=list(self.target_uuids))
11435
11436     ial.Run(self.op.iallocator)
11437
11438     if not ial.success:
11439       raise errors.OpPrereqError("Can't compute solution for changing group of"
11440                                  " instance '%s' using iallocator '%s': %s" %
11441                                  (self.op.instance_name, self.op.iallocator,
11442                                   ial.info),
11443                                  errors.ECODE_NORES)
11444
11445     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11446
11447     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11448                  " instance '%s'", len(jobs), self.op.instance_name)
11449
11450     return ResultWithJobs(jobs)
11451
11452
11453 class LUBackupQuery(NoHooksLU):
11454   """Query the exports list
11455
11456   """
11457   REQ_BGL = False
11458
11459   def ExpandNames(self):
11460     self.needed_locks = {}
11461     self.share_locks[locking.LEVEL_NODE] = 1
11462     if not self.op.nodes:
11463       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11464     else:
11465       self.needed_locks[locking.LEVEL_NODE] = \
11466         _GetWantedNodes(self, self.op.nodes)
11467
11468   def Exec(self, feedback_fn):
11469     """Compute the list of all the exported system images.
11470
11471     @rtype: dict
11472     @return: a dictionary with the structure node->(export-list)
11473         where export-list is a list of the instances exported on
11474         that node.
11475
11476     """
11477     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11478     rpcresult = self.rpc.call_export_list(self.nodes)
11479     result = {}
11480     for node in rpcresult:
11481       if rpcresult[node].fail_msg:
11482         result[node] = False
11483       else:
11484         result[node] = rpcresult[node].payload
11485
11486     return result
11487
11488
11489 class LUBackupPrepare(NoHooksLU):
11490   """Prepares an instance for an export and returns useful information.
11491
11492   """
11493   REQ_BGL = False
11494
11495   def ExpandNames(self):
11496     self._ExpandAndLockInstance()
11497
11498   def CheckPrereq(self):
11499     """Check prerequisites.
11500
11501     """
11502     instance_name = self.op.instance_name
11503
11504     self.instance = self.cfg.GetInstanceInfo(instance_name)
11505     assert self.instance is not None, \
11506           "Cannot retrieve locked instance %s" % self.op.instance_name
11507     _CheckNodeOnline(self, self.instance.primary_node)
11508
11509     self._cds = _GetClusterDomainSecret()
11510
11511   def Exec(self, feedback_fn):
11512     """Prepares an instance for an export.
11513
11514     """
11515     instance = self.instance
11516
11517     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11518       salt = utils.GenerateSecret(8)
11519
11520       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11521       result = self.rpc.call_x509_cert_create(instance.primary_node,
11522                                               constants.RIE_CERT_VALIDITY)
11523       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11524
11525       (name, cert_pem) = result.payload
11526
11527       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11528                                              cert_pem)
11529
11530       return {
11531         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11532         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11533                           salt),
11534         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11535         }
11536
11537     return None
11538
11539
11540 class LUBackupExport(LogicalUnit):
11541   """Export an instance to an image in the cluster.
11542
11543   """
11544   HPATH = "instance-export"
11545   HTYPE = constants.HTYPE_INSTANCE
11546   REQ_BGL = False
11547
11548   def CheckArguments(self):
11549     """Check the arguments.
11550
11551     """
11552     self.x509_key_name = self.op.x509_key_name
11553     self.dest_x509_ca_pem = self.op.destination_x509_ca
11554
11555     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11556       if not self.x509_key_name:
11557         raise errors.OpPrereqError("Missing X509 key name for encryption",
11558                                    errors.ECODE_INVAL)
11559
11560       if not self.dest_x509_ca_pem:
11561         raise errors.OpPrereqError("Missing destination X509 CA",
11562                                    errors.ECODE_INVAL)
11563
11564   def ExpandNames(self):
11565     self._ExpandAndLockInstance()
11566
11567     # Lock all nodes for local exports
11568     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11569       # FIXME: lock only instance primary and destination node
11570       #
11571       # Sad but true, for now we have do lock all nodes, as we don't know where
11572       # the previous export might be, and in this LU we search for it and
11573       # remove it from its current node. In the future we could fix this by:
11574       #  - making a tasklet to search (share-lock all), then create the
11575       #    new one, then one to remove, after
11576       #  - removing the removal operation altogether
11577       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11578
11579   def DeclareLocks(self, level):
11580     """Last minute lock declaration."""
11581     # All nodes are locked anyway, so nothing to do here.
11582
11583   def BuildHooksEnv(self):
11584     """Build hooks env.
11585
11586     This will run on the master, primary node and target node.
11587
11588     """
11589     env = {
11590       "EXPORT_MODE": self.op.mode,
11591       "EXPORT_NODE": self.op.target_node,
11592       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11593       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11594       # TODO: Generic function for boolean env variables
11595       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11596       }
11597
11598     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11599
11600     return env
11601
11602   def BuildHooksNodes(self):
11603     """Build hooks nodes.
11604
11605     """
11606     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11607
11608     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11609       nl.append(self.op.target_node)
11610
11611     return (nl, nl)
11612
11613   def CheckPrereq(self):
11614     """Check prerequisites.
11615
11616     This checks that the instance and node names are valid.
11617
11618     """
11619     instance_name = self.op.instance_name
11620
11621     self.instance = self.cfg.GetInstanceInfo(instance_name)
11622     assert self.instance is not None, \
11623           "Cannot retrieve locked instance %s" % self.op.instance_name
11624     _CheckNodeOnline(self, self.instance.primary_node)
11625
11626     if (self.op.remove_instance and self.instance.admin_up and
11627         not self.op.shutdown):
11628       raise errors.OpPrereqError("Can not remove instance without shutting it"
11629                                  " down before")
11630
11631     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11632       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11633       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11634       assert self.dst_node is not None
11635
11636       _CheckNodeOnline(self, self.dst_node.name)
11637       _CheckNodeNotDrained(self, self.dst_node.name)
11638
11639       self._cds = None
11640       self.dest_disk_info = None
11641       self.dest_x509_ca = None
11642
11643     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11644       self.dst_node = None
11645
11646       if len(self.op.target_node) != len(self.instance.disks):
11647         raise errors.OpPrereqError(("Received destination information for %s"
11648                                     " disks, but instance %s has %s disks") %
11649                                    (len(self.op.target_node), instance_name,
11650                                     len(self.instance.disks)),
11651                                    errors.ECODE_INVAL)
11652
11653       cds = _GetClusterDomainSecret()
11654
11655       # Check X509 key name
11656       try:
11657         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11658       except (TypeError, ValueError), err:
11659         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11660
11661       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11662         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11663                                    errors.ECODE_INVAL)
11664
11665       # Load and verify CA
11666       try:
11667         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11668       except OpenSSL.crypto.Error, err:
11669         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11670                                    (err, ), errors.ECODE_INVAL)
11671
11672       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11673       if errcode is not None:
11674         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11675                                    (msg, ), errors.ECODE_INVAL)
11676
11677       self.dest_x509_ca = cert
11678
11679       # Verify target information
11680       disk_info = []
11681       for idx, disk_data in enumerate(self.op.target_node):
11682         try:
11683           (host, port, magic) = \
11684             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11685         except errors.GenericError, err:
11686           raise errors.OpPrereqError("Target info for disk %s: %s" %
11687                                      (idx, err), errors.ECODE_INVAL)
11688
11689         disk_info.append((host, port, magic))
11690
11691       assert len(disk_info) == len(self.op.target_node)
11692       self.dest_disk_info = disk_info
11693
11694     else:
11695       raise errors.ProgrammerError("Unhandled export mode %r" %
11696                                    self.op.mode)
11697
11698     # instance disk type verification
11699     # TODO: Implement export support for file-based disks
11700     for disk in self.instance.disks:
11701       if disk.dev_type == constants.LD_FILE:
11702         raise errors.OpPrereqError("Export not supported for instances with"
11703                                    " file-based disks", errors.ECODE_INVAL)
11704
11705   def _CleanupExports(self, feedback_fn):
11706     """Removes exports of current instance from all other nodes.
11707
11708     If an instance in a cluster with nodes A..D was exported to node C, its
11709     exports will be removed from the nodes A, B and D.
11710
11711     """
11712     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11713
11714     nodelist = self.cfg.GetNodeList()
11715     nodelist.remove(self.dst_node.name)
11716
11717     # on one-node clusters nodelist will be empty after the removal
11718     # if we proceed the backup would be removed because OpBackupQuery
11719     # substitutes an empty list with the full cluster node list.
11720     iname = self.instance.name
11721     if nodelist:
11722       feedback_fn("Removing old exports for instance %s" % iname)
11723       exportlist = self.rpc.call_export_list(nodelist)
11724       for node in exportlist:
11725         if exportlist[node].fail_msg:
11726           continue
11727         if iname in exportlist[node].payload:
11728           msg = self.rpc.call_export_remove(node, iname).fail_msg
11729           if msg:
11730             self.LogWarning("Could not remove older export for instance %s"
11731                             " on node %s: %s", iname, node, msg)
11732
11733   def Exec(self, feedback_fn):
11734     """Export an instance to an image in the cluster.
11735
11736     """
11737     assert self.op.mode in constants.EXPORT_MODES
11738
11739     instance = self.instance
11740     src_node = instance.primary_node
11741
11742     if self.op.shutdown:
11743       # shutdown the instance, but not the disks
11744       feedback_fn("Shutting down instance %s" % instance.name)
11745       result = self.rpc.call_instance_shutdown(src_node, instance,
11746                                                self.op.shutdown_timeout)
11747       # TODO: Maybe ignore failures if ignore_remove_failures is set
11748       result.Raise("Could not shutdown instance %s on"
11749                    " node %s" % (instance.name, src_node))
11750
11751     # set the disks ID correctly since call_instance_start needs the
11752     # correct drbd minor to create the symlinks
11753     for disk in instance.disks:
11754       self.cfg.SetDiskID(disk, src_node)
11755
11756     activate_disks = (not instance.admin_up)
11757
11758     if activate_disks:
11759       # Activate the instance disks if we'exporting a stopped instance
11760       feedback_fn("Activating disks for %s" % instance.name)
11761       _StartInstanceDisks(self, instance, None)
11762
11763     try:
11764       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11765                                                      instance)
11766
11767       helper.CreateSnapshots()
11768       try:
11769         if (self.op.shutdown and instance.admin_up and
11770             not self.op.remove_instance):
11771           assert not activate_disks
11772           feedback_fn("Starting instance %s" % instance.name)
11773           result = self.rpc.call_instance_start(src_node, instance,
11774                                                 None, None, False)
11775           msg = result.fail_msg
11776           if msg:
11777             feedback_fn("Failed to start instance: %s" % msg)
11778             _ShutdownInstanceDisks(self, instance)
11779             raise errors.OpExecError("Could not start instance: %s" % msg)
11780
11781         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11782           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11783         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11784           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11785           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11786
11787           (key_name, _, _) = self.x509_key_name
11788
11789           dest_ca_pem = \
11790             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11791                                             self.dest_x509_ca)
11792
11793           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11794                                                      key_name, dest_ca_pem,
11795                                                      timeouts)
11796       finally:
11797         helper.Cleanup()
11798
11799       # Check for backwards compatibility
11800       assert len(dresults) == len(instance.disks)
11801       assert compat.all(isinstance(i, bool) for i in dresults), \
11802              "Not all results are boolean: %r" % dresults
11803
11804     finally:
11805       if activate_disks:
11806         feedback_fn("Deactivating disks for %s" % instance.name)
11807         _ShutdownInstanceDisks(self, instance)
11808
11809     if not (compat.all(dresults) and fin_resu):
11810       failures = []
11811       if not fin_resu:
11812         failures.append("export finalization")
11813       if not compat.all(dresults):
11814         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11815                                if not dsk)
11816         failures.append("disk export: disk(s) %s" % fdsk)
11817
11818       raise errors.OpExecError("Export failed, errors in %s" %
11819                                utils.CommaJoin(failures))
11820
11821     # At this point, the export was successful, we can cleanup/finish
11822
11823     # Remove instance if requested
11824     if self.op.remove_instance:
11825       feedback_fn("Removing instance %s" % instance.name)
11826       _RemoveInstance(self, feedback_fn, instance,
11827                       self.op.ignore_remove_failures)
11828
11829     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11830       self._CleanupExports(feedback_fn)
11831
11832     return fin_resu, dresults
11833
11834
11835 class LUBackupRemove(NoHooksLU):
11836   """Remove exports related to the named instance.
11837
11838   """
11839   REQ_BGL = False
11840
11841   def ExpandNames(self):
11842     self.needed_locks = {}
11843     # We need all nodes to be locked in order for RemoveExport to work, but we
11844     # don't need to lock the instance itself, as nothing will happen to it (and
11845     # we can remove exports also for a removed instance)
11846     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11847
11848   def Exec(self, feedback_fn):
11849     """Remove any export.
11850
11851     """
11852     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11853     # If the instance was not found we'll try with the name that was passed in.
11854     # This will only work if it was an FQDN, though.
11855     fqdn_warn = False
11856     if not instance_name:
11857       fqdn_warn = True
11858       instance_name = self.op.instance_name
11859
11860     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11861     exportlist = self.rpc.call_export_list(locked_nodes)
11862     found = False
11863     for node in exportlist:
11864       msg = exportlist[node].fail_msg
11865       if msg:
11866         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11867         continue
11868       if instance_name in exportlist[node].payload:
11869         found = True
11870         result = self.rpc.call_export_remove(node, instance_name)
11871         msg = result.fail_msg
11872         if msg:
11873           logging.error("Could not remove export for instance %s"
11874                         " on node %s: %s", instance_name, node, msg)
11875
11876     if fqdn_warn and not found:
11877       feedback_fn("Export not found. If trying to remove an export belonging"
11878                   " to a deleted instance please use its Fully Qualified"
11879                   " Domain Name.")
11880
11881
11882 class LUGroupAdd(LogicalUnit):
11883   """Logical unit for creating node groups.
11884
11885   """
11886   HPATH = "group-add"
11887   HTYPE = constants.HTYPE_GROUP
11888   REQ_BGL = False
11889
11890   def ExpandNames(self):
11891     # We need the new group's UUID here so that we can create and acquire the
11892     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11893     # that it should not check whether the UUID exists in the configuration.
11894     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11895     self.needed_locks = {}
11896     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11897
11898   def CheckPrereq(self):
11899     """Check prerequisites.
11900
11901     This checks that the given group name is not an existing node group
11902     already.
11903
11904     """
11905     try:
11906       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11907     except errors.OpPrereqError:
11908       pass
11909     else:
11910       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11911                                  " node group (UUID: %s)" %
11912                                  (self.op.group_name, existing_uuid),
11913                                  errors.ECODE_EXISTS)
11914
11915     if self.op.ndparams:
11916       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11917
11918   def BuildHooksEnv(self):
11919     """Build hooks env.
11920
11921     """
11922     return {
11923       "GROUP_NAME": self.op.group_name,
11924       }
11925
11926   def BuildHooksNodes(self):
11927     """Build hooks nodes.
11928
11929     """
11930     mn = self.cfg.GetMasterNode()
11931     return ([mn], [mn])
11932
11933   def Exec(self, feedback_fn):
11934     """Add the node group to the cluster.
11935
11936     """
11937     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11938                                   uuid=self.group_uuid,
11939                                   alloc_policy=self.op.alloc_policy,
11940                                   ndparams=self.op.ndparams)
11941
11942     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11943     del self.remove_locks[locking.LEVEL_NODEGROUP]
11944
11945
11946 class LUGroupAssignNodes(NoHooksLU):
11947   """Logical unit for assigning nodes to groups.
11948
11949   """
11950   REQ_BGL = False
11951
11952   def ExpandNames(self):
11953     # These raise errors.OpPrereqError on their own:
11954     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11955     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11956
11957     # We want to lock all the affected nodes and groups. We have readily
11958     # available the list of nodes, and the *destination* group. To gather the
11959     # list of "source" groups, we need to fetch node information later on.
11960     self.needed_locks = {
11961       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11962       locking.LEVEL_NODE: self.op.nodes,
11963       }
11964
11965   def DeclareLocks(self, level):
11966     if level == locking.LEVEL_NODEGROUP:
11967       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11968
11969       # Try to get all affected nodes' groups without having the group or node
11970       # lock yet. Needs verification later in the code flow.
11971       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11972
11973       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11974
11975   def CheckPrereq(self):
11976     """Check prerequisites.
11977
11978     """
11979     assert self.needed_locks[locking.LEVEL_NODEGROUP]
11980     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11981             frozenset(self.op.nodes))
11982
11983     expected_locks = (set([self.group_uuid]) |
11984                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11985     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11986     if actual_locks != expected_locks:
11987       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11988                                " current groups are '%s', used to be '%s'" %
11989                                (utils.CommaJoin(expected_locks),
11990                                 utils.CommaJoin(actual_locks)))
11991
11992     self.node_data = self.cfg.GetAllNodesInfo()
11993     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11994     instance_data = self.cfg.GetAllInstancesInfo()
11995
11996     if self.group is None:
11997       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11998                                (self.op.group_name, self.group_uuid))
11999
12000     (new_splits, previous_splits) = \
12001       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12002                                              for node in self.op.nodes],
12003                                             self.node_data, instance_data)
12004
12005     if new_splits:
12006       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12007
12008       if not self.op.force:
12009         raise errors.OpExecError("The following instances get split by this"
12010                                  " change and --force was not given: %s" %
12011                                  fmt_new_splits)
12012       else:
12013         self.LogWarning("This operation will split the following instances: %s",
12014                         fmt_new_splits)
12015
12016         if previous_splits:
12017           self.LogWarning("In addition, these already-split instances continue"
12018                           " to be split across groups: %s",
12019                           utils.CommaJoin(utils.NiceSort(previous_splits)))
12020
12021   def Exec(self, feedback_fn):
12022     """Assign nodes to a new group.
12023
12024     """
12025     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12026
12027     self.cfg.AssignGroupNodes(mods)
12028
12029   @staticmethod
12030   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12031     """Check for split instances after a node assignment.
12032
12033     This method considers a series of node assignments as an atomic operation,
12034     and returns information about split instances after applying the set of
12035     changes.
12036
12037     In particular, it returns information about newly split instances, and
12038     instances that were already split, and remain so after the change.
12039
12040     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12041     considered.
12042
12043     @type changes: list of (node_name, new_group_uuid) pairs.
12044     @param changes: list of node assignments to consider.
12045     @param node_data: a dict with data for all nodes
12046     @param instance_data: a dict with all instances to consider
12047     @rtype: a two-tuple
12048     @return: a list of instances that were previously okay and result split as a
12049       consequence of this change, and a list of instances that were previously
12050       split and this change does not fix.
12051
12052     """
12053     changed_nodes = dict((node, group) for node, group in changes
12054                          if node_data[node].group != group)
12055
12056     all_split_instances = set()
12057     previously_split_instances = set()
12058
12059     def InstanceNodes(instance):
12060       return [instance.primary_node] + list(instance.secondary_nodes)
12061
12062     for inst in instance_data.values():
12063       if inst.disk_template not in constants.DTS_INT_MIRROR:
12064         continue
12065
12066       instance_nodes = InstanceNodes(inst)
12067
12068       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12069         previously_split_instances.add(inst.name)
12070
12071       if len(set(changed_nodes.get(node, node_data[node].group)
12072                  for node in instance_nodes)) > 1:
12073         all_split_instances.add(inst.name)
12074
12075     return (list(all_split_instances - previously_split_instances),
12076             list(previously_split_instances & all_split_instances))
12077
12078
12079 class _GroupQuery(_QueryBase):
12080   FIELDS = query.GROUP_FIELDS
12081
12082   def ExpandNames(self, lu):
12083     lu.needed_locks = {}
12084
12085     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12086     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12087
12088     if not self.names:
12089       self.wanted = [name_to_uuid[name]
12090                      for name in utils.NiceSort(name_to_uuid.keys())]
12091     else:
12092       # Accept names to be either names or UUIDs.
12093       missing = []
12094       self.wanted = []
12095       all_uuid = frozenset(self._all_groups.keys())
12096
12097       for name in self.names:
12098         if name in all_uuid:
12099           self.wanted.append(name)
12100         elif name in name_to_uuid:
12101           self.wanted.append(name_to_uuid[name])
12102         else:
12103           missing.append(name)
12104
12105       if missing:
12106         raise errors.OpPrereqError("Some groups do not exist: %s" %
12107                                    utils.CommaJoin(missing),
12108                                    errors.ECODE_NOENT)
12109
12110   def DeclareLocks(self, lu, level):
12111     pass
12112
12113   def _GetQueryData(self, lu):
12114     """Computes the list of node groups and their attributes.
12115
12116     """
12117     do_nodes = query.GQ_NODE in self.requested_data
12118     do_instances = query.GQ_INST in self.requested_data
12119
12120     group_to_nodes = None
12121     group_to_instances = None
12122
12123     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12124     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12125     # latter GetAllInstancesInfo() is not enough, for we have to go through
12126     # instance->node. Hence, we will need to process nodes even if we only need
12127     # instance information.
12128     if do_nodes or do_instances:
12129       all_nodes = lu.cfg.GetAllNodesInfo()
12130       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12131       node_to_group = {}
12132
12133       for node in all_nodes.values():
12134         if node.group in group_to_nodes:
12135           group_to_nodes[node.group].append(node.name)
12136           node_to_group[node.name] = node.group
12137
12138       if do_instances:
12139         all_instances = lu.cfg.GetAllInstancesInfo()
12140         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12141
12142         for instance in all_instances.values():
12143           node = instance.primary_node
12144           if node in node_to_group:
12145             group_to_instances[node_to_group[node]].append(instance.name)
12146
12147         if not do_nodes:
12148           # Do not pass on node information if it was not requested.
12149           group_to_nodes = None
12150
12151     return query.GroupQueryData([self._all_groups[uuid]
12152                                  for uuid in self.wanted],
12153                                 group_to_nodes, group_to_instances)
12154
12155
12156 class LUGroupQuery(NoHooksLU):
12157   """Logical unit for querying node groups.
12158
12159   """
12160   REQ_BGL = False
12161
12162   def CheckArguments(self):
12163     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12164                           self.op.output_fields, False)
12165
12166   def ExpandNames(self):
12167     self.gq.ExpandNames(self)
12168
12169   def DeclareLocks(self, level):
12170     self.gq.DeclareLocks(self, level)
12171
12172   def Exec(self, feedback_fn):
12173     return self.gq.OldStyleQuery(self)
12174
12175
12176 class LUGroupSetParams(LogicalUnit):
12177   """Modifies the parameters of a node group.
12178
12179   """
12180   HPATH = "group-modify"
12181   HTYPE = constants.HTYPE_GROUP
12182   REQ_BGL = False
12183
12184   def CheckArguments(self):
12185     all_changes = [
12186       self.op.ndparams,
12187       self.op.alloc_policy,
12188       ]
12189
12190     if all_changes.count(None) == len(all_changes):
12191       raise errors.OpPrereqError("Please pass at least one modification",
12192                                  errors.ECODE_INVAL)
12193
12194   def ExpandNames(self):
12195     # This raises errors.OpPrereqError on its own:
12196     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12197
12198     self.needed_locks = {
12199       locking.LEVEL_NODEGROUP: [self.group_uuid],
12200       }
12201
12202   def CheckPrereq(self):
12203     """Check prerequisites.
12204
12205     """
12206     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12207
12208     if self.group is None:
12209       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12210                                (self.op.group_name, self.group_uuid))
12211
12212     if self.op.ndparams:
12213       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12214       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12215       self.new_ndparams = new_ndparams
12216
12217   def BuildHooksEnv(self):
12218     """Build hooks env.
12219
12220     """
12221     return {
12222       "GROUP_NAME": self.op.group_name,
12223       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12224       }
12225
12226   def BuildHooksNodes(self):
12227     """Build hooks nodes.
12228
12229     """
12230     mn = self.cfg.GetMasterNode()
12231     return ([mn], [mn])
12232
12233   def Exec(self, feedback_fn):
12234     """Modifies the node group.
12235
12236     """
12237     result = []
12238
12239     if self.op.ndparams:
12240       self.group.ndparams = self.new_ndparams
12241       result.append(("ndparams", str(self.group.ndparams)))
12242
12243     if self.op.alloc_policy:
12244       self.group.alloc_policy = self.op.alloc_policy
12245
12246     self.cfg.Update(self.group, feedback_fn)
12247     return result
12248
12249
12250 class LUGroupRemove(LogicalUnit):
12251   HPATH = "group-remove"
12252   HTYPE = constants.HTYPE_GROUP
12253   REQ_BGL = False
12254
12255   def ExpandNames(self):
12256     # This will raises errors.OpPrereqError on its own:
12257     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12258     self.needed_locks = {
12259       locking.LEVEL_NODEGROUP: [self.group_uuid],
12260       }
12261
12262   def CheckPrereq(self):
12263     """Check prerequisites.
12264
12265     This checks that the given group name exists as a node group, that is
12266     empty (i.e., contains no nodes), and that is not the last group of the
12267     cluster.
12268
12269     """
12270     # Verify that the group is empty.
12271     group_nodes = [node.name
12272                    for node in self.cfg.GetAllNodesInfo().values()
12273                    if node.group == self.group_uuid]
12274
12275     if group_nodes:
12276       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12277                                  " nodes: %s" %
12278                                  (self.op.group_name,
12279                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12280                                  errors.ECODE_STATE)
12281
12282     # Verify the cluster would not be left group-less.
12283     if len(self.cfg.GetNodeGroupList()) == 1:
12284       raise errors.OpPrereqError("Group '%s' is the only group,"
12285                                  " cannot be removed" %
12286                                  self.op.group_name,
12287                                  errors.ECODE_STATE)
12288
12289   def BuildHooksEnv(self):
12290     """Build hooks env.
12291
12292     """
12293     return {
12294       "GROUP_NAME": self.op.group_name,
12295       }
12296
12297   def BuildHooksNodes(self):
12298     """Build hooks nodes.
12299
12300     """
12301     mn = self.cfg.GetMasterNode()
12302     return ([mn], [mn])
12303
12304   def Exec(self, feedback_fn):
12305     """Remove the node group.
12306
12307     """
12308     try:
12309       self.cfg.RemoveNodeGroup(self.group_uuid)
12310     except errors.ConfigurationError:
12311       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12312                                (self.op.group_name, self.group_uuid))
12313
12314     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12315
12316
12317 class LUGroupRename(LogicalUnit):
12318   HPATH = "group-rename"
12319   HTYPE = constants.HTYPE_GROUP
12320   REQ_BGL = False
12321
12322   def ExpandNames(self):
12323     # This raises errors.OpPrereqError on its own:
12324     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12325
12326     self.needed_locks = {
12327       locking.LEVEL_NODEGROUP: [self.group_uuid],
12328       }
12329
12330   def CheckPrereq(self):
12331     """Check prerequisites.
12332
12333     Ensures requested new name is not yet used.
12334
12335     """
12336     try:
12337       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12338     except errors.OpPrereqError:
12339       pass
12340     else:
12341       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12342                                  " node group (UUID: %s)" %
12343                                  (self.op.new_name, new_name_uuid),
12344                                  errors.ECODE_EXISTS)
12345
12346   def BuildHooksEnv(self):
12347     """Build hooks env.
12348
12349     """
12350     return {
12351       "OLD_NAME": self.op.group_name,
12352       "NEW_NAME": self.op.new_name,
12353       }
12354
12355   def BuildHooksNodes(self):
12356     """Build hooks nodes.
12357
12358     """
12359     mn = self.cfg.GetMasterNode()
12360
12361     all_nodes = self.cfg.GetAllNodesInfo()
12362     all_nodes.pop(mn, None)
12363
12364     run_nodes = [mn]
12365     run_nodes.extend(node.name for node in all_nodes.values()
12366                      if node.group == self.group_uuid)
12367
12368     return (run_nodes, run_nodes)
12369
12370   def Exec(self, feedback_fn):
12371     """Rename the node group.
12372
12373     """
12374     group = self.cfg.GetNodeGroup(self.group_uuid)
12375
12376     if group is None:
12377       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12378                                (self.op.group_name, self.group_uuid))
12379
12380     group.name = self.op.new_name
12381     self.cfg.Update(group, feedback_fn)
12382
12383     return self.op.new_name
12384
12385
12386 class LUGroupEvacuate(LogicalUnit):
12387   HPATH = "group-evacuate"
12388   HTYPE = constants.HTYPE_GROUP
12389   REQ_BGL = False
12390
12391   def ExpandNames(self):
12392     # This raises errors.OpPrereqError on its own:
12393     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12394
12395     if self.op.target_groups:
12396       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12397                                   self.op.target_groups)
12398     else:
12399       self.req_target_uuids = []
12400
12401     if self.group_uuid in self.req_target_uuids:
12402       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12403                                  " as a target group (targets are %s)" %
12404                                  (self.group_uuid,
12405                                   utils.CommaJoin(self.req_target_uuids)),
12406                                  errors.ECODE_INVAL)
12407
12408     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12409
12410     self.share_locks = _ShareAll()
12411     self.needed_locks = {
12412       locking.LEVEL_INSTANCE: [],
12413       locking.LEVEL_NODEGROUP: [],
12414       locking.LEVEL_NODE: [],
12415       }
12416
12417   def DeclareLocks(self, level):
12418     if level == locking.LEVEL_INSTANCE:
12419       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12420
12421       # Lock instances optimistically, needs verification once node and group
12422       # locks have been acquired
12423       self.needed_locks[locking.LEVEL_INSTANCE] = \
12424         self.cfg.GetNodeGroupInstances(self.group_uuid)
12425
12426     elif level == locking.LEVEL_NODEGROUP:
12427       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12428
12429       if self.req_target_uuids:
12430         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12431
12432         # Lock all groups used by instances optimistically; this requires going
12433         # via the node before it's locked, requiring verification later on
12434         lock_groups.update(group_uuid
12435                            for instance_name in
12436                              self.owned_locks(locking.LEVEL_INSTANCE)
12437                            for group_uuid in
12438                              self.cfg.GetInstanceNodeGroups(instance_name))
12439       else:
12440         # No target groups, need to lock all of them
12441         lock_groups = locking.ALL_SET
12442
12443       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12444
12445     elif level == locking.LEVEL_NODE:
12446       # This will only lock the nodes in the group to be evacuated which
12447       # contain actual instances
12448       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12449       self._LockInstancesNodes()
12450
12451       # Lock all nodes in group to be evacuated and target groups
12452       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12453       assert self.group_uuid in owned_groups
12454       member_nodes = [node_name
12455                       for group in owned_groups
12456                       for node_name in self.cfg.GetNodeGroup(group).members]
12457       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12458
12459   def CheckPrereq(self):
12460     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12461     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12462     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12463
12464     assert owned_groups.issuperset(self.req_target_uuids)
12465     assert self.group_uuid in owned_groups
12466
12467     # Check if locked instances are still correct
12468     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12469
12470     # Get instance information
12471     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12472
12473     # Check if node groups for locked instances are still correct
12474     for instance_name in owned_instances:
12475       inst = self.instances[instance_name]
12476       assert owned_nodes.issuperset(inst.all_nodes), \
12477         "Instance %s's nodes changed while we kept the lock" % instance_name
12478
12479       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12480                                              owned_groups)
12481
12482       assert self.group_uuid in inst_groups, \
12483         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12484
12485     if self.req_target_uuids:
12486       # User requested specific target groups
12487       self.target_uuids = self.req_target_uuids
12488     else:
12489       # All groups except the one to be evacuated are potential targets
12490       self.target_uuids = [group_uuid for group_uuid in owned_groups
12491                            if group_uuid != self.group_uuid]
12492
12493       if not self.target_uuids:
12494         raise errors.OpPrereqError("There are no possible target groups",
12495                                    errors.ECODE_INVAL)
12496
12497   def BuildHooksEnv(self):
12498     """Build hooks env.
12499
12500     """
12501     return {
12502       "GROUP_NAME": self.op.group_name,
12503       "TARGET_GROUPS": " ".join(self.target_uuids),
12504       }
12505
12506   def BuildHooksNodes(self):
12507     """Build hooks nodes.
12508
12509     """
12510     mn = self.cfg.GetMasterNode()
12511
12512     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12513
12514     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12515
12516     return (run_nodes, run_nodes)
12517
12518   def Exec(self, feedback_fn):
12519     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12520
12521     assert self.group_uuid not in self.target_uuids
12522
12523     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12524                      instances=instances, target_groups=self.target_uuids)
12525
12526     ial.Run(self.op.iallocator)
12527
12528     if not ial.success:
12529       raise errors.OpPrereqError("Can't compute group evacuation using"
12530                                  " iallocator '%s': %s" %
12531                                  (self.op.iallocator, ial.info),
12532                                  errors.ECODE_NORES)
12533
12534     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12535
12536     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12537                  len(jobs), self.op.group_name)
12538
12539     return ResultWithJobs(jobs)
12540
12541
12542 class TagsLU(NoHooksLU): # pylint: disable=W0223
12543   """Generic tags LU.
12544
12545   This is an abstract class which is the parent of all the other tags LUs.
12546
12547   """
12548   def ExpandNames(self):
12549     self.group_uuid = None
12550     self.needed_locks = {}
12551     if self.op.kind == constants.TAG_NODE:
12552       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12553       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12554     elif self.op.kind == constants.TAG_INSTANCE:
12555       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12556       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12557     elif self.op.kind == constants.TAG_NODEGROUP:
12558       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12559
12560     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12561     # not possible to acquire the BGL based on opcode parameters)
12562
12563   def CheckPrereq(self):
12564     """Check prerequisites.
12565
12566     """
12567     if self.op.kind == constants.TAG_CLUSTER:
12568       self.target = self.cfg.GetClusterInfo()
12569     elif self.op.kind == constants.TAG_NODE:
12570       self.target = self.cfg.GetNodeInfo(self.op.name)
12571     elif self.op.kind == constants.TAG_INSTANCE:
12572       self.target = self.cfg.GetInstanceInfo(self.op.name)
12573     elif self.op.kind == constants.TAG_NODEGROUP:
12574       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12575     else:
12576       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12577                                  str(self.op.kind), errors.ECODE_INVAL)
12578
12579
12580 class LUTagsGet(TagsLU):
12581   """Returns the tags of a given object.
12582
12583   """
12584   REQ_BGL = False
12585
12586   def ExpandNames(self):
12587     TagsLU.ExpandNames(self)
12588
12589     # Share locks as this is only a read operation
12590     self.share_locks = _ShareAll()
12591
12592   def Exec(self, feedback_fn):
12593     """Returns the tag list.
12594
12595     """
12596     return list(self.target.GetTags())
12597
12598
12599 class LUTagsSearch(NoHooksLU):
12600   """Searches the tags for a given pattern.
12601
12602   """
12603   REQ_BGL = False
12604
12605   def ExpandNames(self):
12606     self.needed_locks = {}
12607
12608   def CheckPrereq(self):
12609     """Check prerequisites.
12610
12611     This checks the pattern passed for validity by compiling it.
12612
12613     """
12614     try:
12615       self.re = re.compile(self.op.pattern)
12616     except re.error, err:
12617       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12618                                  (self.op.pattern, err), errors.ECODE_INVAL)
12619
12620   def Exec(self, feedback_fn):
12621     """Returns the tag list.
12622
12623     """
12624     cfg = self.cfg
12625     tgts = [("/cluster", cfg.GetClusterInfo())]
12626     ilist = cfg.GetAllInstancesInfo().values()
12627     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12628     nlist = cfg.GetAllNodesInfo().values()
12629     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12630     tgts.extend(("/nodegroup/%s" % n.name, n)
12631                 for n in cfg.GetAllNodeGroupsInfo().values())
12632     results = []
12633     for path, target in tgts:
12634       for tag in target.GetTags():
12635         if self.re.search(tag):
12636           results.append((path, tag))
12637     return results
12638
12639
12640 class LUTagsSet(TagsLU):
12641   """Sets a tag on a given object.
12642
12643   """
12644   REQ_BGL = False
12645
12646   def CheckPrereq(self):
12647     """Check prerequisites.
12648
12649     This checks the type and length of the tag name and value.
12650
12651     """
12652     TagsLU.CheckPrereq(self)
12653     for tag in self.op.tags:
12654       objects.TaggableObject.ValidateTag(tag)
12655
12656   def Exec(self, feedback_fn):
12657     """Sets the tag.
12658
12659     """
12660     try:
12661       for tag in self.op.tags:
12662         self.target.AddTag(tag)
12663     except errors.TagError, err:
12664       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12665     self.cfg.Update(self.target, feedback_fn)
12666
12667
12668 class LUTagsDel(TagsLU):
12669   """Delete a list of tags from a given object.
12670
12671   """
12672   REQ_BGL = False
12673
12674   def CheckPrereq(self):
12675     """Check prerequisites.
12676
12677     This checks that we have the given tag.
12678
12679     """
12680     TagsLU.CheckPrereq(self)
12681     for tag in self.op.tags:
12682       objects.TaggableObject.ValidateTag(tag)
12683     del_tags = frozenset(self.op.tags)
12684     cur_tags = self.target.GetTags()
12685
12686     diff_tags = del_tags - cur_tags
12687     if diff_tags:
12688       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12689       raise errors.OpPrereqError("Tag(s) %s not found" %
12690                                  (utils.CommaJoin(diff_names), ),
12691                                  errors.ECODE_NOENT)
12692
12693   def Exec(self, feedback_fn):
12694     """Remove the tag from the object.
12695
12696     """
12697     for tag in self.op.tags:
12698       self.target.RemoveTag(tag)
12699     self.cfg.Update(self.target, feedback_fn)
12700
12701
12702 class LUTestDelay(NoHooksLU):
12703   """Sleep for a specified amount of time.
12704
12705   This LU sleeps on the master and/or nodes for a specified amount of
12706   time.
12707
12708   """
12709   REQ_BGL = False
12710
12711   def ExpandNames(self):
12712     """Expand names and set required locks.
12713
12714     This expands the node list, if any.
12715
12716     """
12717     self.needed_locks = {}
12718     if self.op.on_nodes:
12719       # _GetWantedNodes can be used here, but is not always appropriate to use
12720       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12721       # more information.
12722       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12723       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12724
12725   def _TestDelay(self):
12726     """Do the actual sleep.
12727
12728     """
12729     if self.op.on_master:
12730       if not utils.TestDelay(self.op.duration):
12731         raise errors.OpExecError("Error during master delay test")
12732     if self.op.on_nodes:
12733       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12734       for node, node_result in result.items():
12735         node_result.Raise("Failure during rpc call to node %s" % node)
12736
12737   def Exec(self, feedback_fn):
12738     """Execute the test delay opcode, with the wanted repetitions.
12739
12740     """
12741     if self.op.repeat == 0:
12742       self._TestDelay()
12743     else:
12744       top_value = self.op.repeat - 1
12745       for i in range(self.op.repeat):
12746         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12747         self._TestDelay()
12748
12749
12750 class LUTestJqueue(NoHooksLU):
12751   """Utility LU to test some aspects of the job queue.
12752
12753   """
12754   REQ_BGL = False
12755
12756   # Must be lower than default timeout for WaitForJobChange to see whether it
12757   # notices changed jobs
12758   _CLIENT_CONNECT_TIMEOUT = 20.0
12759   _CLIENT_CONFIRM_TIMEOUT = 60.0
12760
12761   @classmethod
12762   def _NotifyUsingSocket(cls, cb, errcls):
12763     """Opens a Unix socket and waits for another program to connect.
12764
12765     @type cb: callable
12766     @param cb: Callback to send socket name to client
12767     @type errcls: class
12768     @param errcls: Exception class to use for errors
12769
12770     """
12771     # Using a temporary directory as there's no easy way to create temporary
12772     # sockets without writing a custom loop around tempfile.mktemp and
12773     # socket.bind
12774     tmpdir = tempfile.mkdtemp()
12775     try:
12776       tmpsock = utils.PathJoin(tmpdir, "sock")
12777
12778       logging.debug("Creating temporary socket at %s", tmpsock)
12779       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12780       try:
12781         sock.bind(tmpsock)
12782         sock.listen(1)
12783
12784         # Send details to client
12785         cb(tmpsock)
12786
12787         # Wait for client to connect before continuing
12788         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12789         try:
12790           (conn, _) = sock.accept()
12791         except socket.error, err:
12792           raise errcls("Client didn't connect in time (%s)" % err)
12793       finally:
12794         sock.close()
12795     finally:
12796       # Remove as soon as client is connected
12797       shutil.rmtree(tmpdir)
12798
12799     # Wait for client to close
12800     try:
12801       try:
12802         # pylint: disable=E1101
12803         # Instance of '_socketobject' has no ... member
12804         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12805         conn.recv(1)
12806       except socket.error, err:
12807         raise errcls("Client failed to confirm notification (%s)" % err)
12808     finally:
12809       conn.close()
12810
12811   def _SendNotification(self, test, arg, sockname):
12812     """Sends a notification to the client.
12813
12814     @type test: string
12815     @param test: Test name
12816     @param arg: Test argument (depends on test)
12817     @type sockname: string
12818     @param sockname: Socket path
12819
12820     """
12821     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12822
12823   def _Notify(self, prereq, test, arg):
12824     """Notifies the client of a test.
12825
12826     @type prereq: bool
12827     @param prereq: Whether this is a prereq-phase test
12828     @type test: string
12829     @param test: Test name
12830     @param arg: Test argument (depends on test)
12831
12832     """
12833     if prereq:
12834       errcls = errors.OpPrereqError
12835     else:
12836       errcls = errors.OpExecError
12837
12838     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12839                                                   test, arg),
12840                                    errcls)
12841
12842   def CheckArguments(self):
12843     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12844     self.expandnames_calls = 0
12845
12846   def ExpandNames(self):
12847     checkargs_calls = getattr(self, "checkargs_calls", 0)
12848     if checkargs_calls < 1:
12849       raise errors.ProgrammerError("CheckArguments was not called")
12850
12851     self.expandnames_calls += 1
12852
12853     if self.op.notify_waitlock:
12854       self._Notify(True, constants.JQT_EXPANDNAMES, None)
12855
12856     self.LogInfo("Expanding names")
12857
12858     # Get lock on master node (just to get a lock, not for a particular reason)
12859     self.needed_locks = {
12860       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12861       }
12862
12863   def Exec(self, feedback_fn):
12864     if self.expandnames_calls < 1:
12865       raise errors.ProgrammerError("ExpandNames was not called")
12866
12867     if self.op.notify_exec:
12868       self._Notify(False, constants.JQT_EXEC, None)
12869
12870     self.LogInfo("Executing")
12871
12872     if self.op.log_messages:
12873       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12874       for idx, msg in enumerate(self.op.log_messages):
12875         self.LogInfo("Sending log message %s", idx + 1)
12876         feedback_fn(constants.JQT_MSGPREFIX + msg)
12877         # Report how many test messages have been sent
12878         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12879
12880     if self.op.fail:
12881       raise errors.OpExecError("Opcode failure was requested")
12882
12883     return True
12884
12885
12886 class IAllocator(object):
12887   """IAllocator framework.
12888
12889   An IAllocator instance has three sets of attributes:
12890     - cfg that is needed to query the cluster
12891     - input data (all members of the _KEYS class attribute are required)
12892     - four buffer attributes (in|out_data|text), that represent the
12893       input (to the external script) in text and data structure format,
12894       and the output from it, again in two formats
12895     - the result variables from the script (success, info, nodes) for
12896       easy usage
12897
12898   """
12899   # pylint: disable=R0902
12900   # lots of instance attributes
12901
12902   def __init__(self, cfg, rpc, mode, **kwargs):
12903     self.cfg = cfg
12904     self.rpc = rpc
12905     # init buffer variables
12906     self.in_text = self.out_text = self.in_data = self.out_data = None
12907     # init all input fields so that pylint is happy
12908     self.mode = mode
12909     self.memory = self.disks = self.disk_template = None
12910     self.os = self.tags = self.nics = self.vcpus = None
12911     self.hypervisor = None
12912     self.relocate_from = None
12913     self.name = None
12914     self.instances = None
12915     self.evac_mode = None
12916     self.target_groups = []
12917     # computed fields
12918     self.required_nodes = None
12919     # init result fields
12920     self.success = self.info = self.result = None
12921
12922     try:
12923       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12924     except KeyError:
12925       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12926                                    " IAllocator" % self.mode)
12927
12928     keyset = [n for (n, _) in keydata]
12929
12930     for key in kwargs:
12931       if key not in keyset:
12932         raise errors.ProgrammerError("Invalid input parameter '%s' to"
12933                                      " IAllocator" % key)
12934       setattr(self, key, kwargs[key])
12935
12936     for key in keyset:
12937       if key not in kwargs:
12938         raise errors.ProgrammerError("Missing input parameter '%s' to"
12939                                      " IAllocator" % key)
12940     self._BuildInputData(compat.partial(fn, self), keydata)
12941
12942   def _ComputeClusterData(self):
12943     """Compute the generic allocator input data.
12944
12945     This is the data that is independent of the actual operation.
12946
12947     """
12948     cfg = self.cfg
12949     cluster_info = cfg.GetClusterInfo()
12950     # cluster data
12951     data = {
12952       "version": constants.IALLOCATOR_VERSION,
12953       "cluster_name": cfg.GetClusterName(),
12954       "cluster_tags": list(cluster_info.GetTags()),
12955       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12956       # we don't have job IDs
12957       }
12958     ninfo = cfg.GetAllNodesInfo()
12959     iinfo = cfg.GetAllInstancesInfo().values()
12960     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12961
12962     # node data
12963     node_list = [n.name for n in ninfo.values() if n.vm_capable]
12964
12965     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12966       hypervisor_name = self.hypervisor
12967     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12968       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12969     else:
12970       hypervisor_name = cluster_info.enabled_hypervisors[0]
12971
12972     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12973                                         hypervisor_name)
12974     node_iinfo = \
12975       self.rpc.call_all_instances_info(node_list,
12976                                        cluster_info.enabled_hypervisors)
12977
12978     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12979
12980     config_ndata = self._ComputeBasicNodeData(ninfo)
12981     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12982                                                  i_list, config_ndata)
12983     assert len(data["nodes"]) == len(ninfo), \
12984         "Incomplete node data computed"
12985
12986     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12987
12988     self.in_data = data
12989
12990   @staticmethod
12991   def _ComputeNodeGroupData(cfg):
12992     """Compute node groups data.
12993
12994     """
12995     ng = dict((guuid, {
12996       "name": gdata.name,
12997       "alloc_policy": gdata.alloc_policy,
12998       })
12999       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13000
13001     return ng
13002
13003   @staticmethod
13004   def _ComputeBasicNodeData(node_cfg):
13005     """Compute global node data.
13006
13007     @rtype: dict
13008     @returns: a dict of name: (node dict, node config)
13009
13010     """
13011     # fill in static (config-based) values
13012     node_results = dict((ninfo.name, {
13013       "tags": list(ninfo.GetTags()),
13014       "primary_ip": ninfo.primary_ip,
13015       "secondary_ip": ninfo.secondary_ip,
13016       "offline": ninfo.offline,
13017       "drained": ninfo.drained,
13018       "master_candidate": ninfo.master_candidate,
13019       "group": ninfo.group,
13020       "master_capable": ninfo.master_capable,
13021       "vm_capable": ninfo.vm_capable,
13022       })
13023       for ninfo in node_cfg.values())
13024
13025     return node_results
13026
13027   @staticmethod
13028   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13029                               node_results):
13030     """Compute global node data.
13031
13032     @param node_results: the basic node structures as filled from the config
13033
13034     """
13035     # make a copy of the current dict
13036     node_results = dict(node_results)
13037     for nname, nresult in node_data.items():
13038       assert nname in node_results, "Missing basic data for node %s" % nname
13039       ninfo = node_cfg[nname]
13040
13041       if not (ninfo.offline or ninfo.drained):
13042         nresult.Raise("Can't get data for node %s" % nname)
13043         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13044                                 nname)
13045         remote_info = nresult.payload
13046
13047         for attr in ["memory_total", "memory_free", "memory_dom0",
13048                      "vg_size", "vg_free", "cpu_total"]:
13049           if attr not in remote_info:
13050             raise errors.OpExecError("Node '%s' didn't return attribute"
13051                                      " '%s'" % (nname, attr))
13052           if not isinstance(remote_info[attr], int):
13053             raise errors.OpExecError("Node '%s' returned invalid value"
13054                                      " for '%s': %s" %
13055                                      (nname, attr, remote_info[attr]))
13056         # compute memory used by primary instances
13057         i_p_mem = i_p_up_mem = 0
13058         for iinfo, beinfo in i_list:
13059           if iinfo.primary_node == nname:
13060             i_p_mem += beinfo[constants.BE_MEMORY]
13061             if iinfo.name not in node_iinfo[nname].payload:
13062               i_used_mem = 0
13063             else:
13064               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13065             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13066             remote_info["memory_free"] -= max(0, i_mem_diff)
13067
13068             if iinfo.admin_up:
13069               i_p_up_mem += beinfo[constants.BE_MEMORY]
13070
13071         # compute memory used by instances
13072         pnr_dyn = {
13073           "total_memory": remote_info["memory_total"],
13074           "reserved_memory": remote_info["memory_dom0"],
13075           "free_memory": remote_info["memory_free"],
13076           "total_disk": remote_info["vg_size"],
13077           "free_disk": remote_info["vg_free"],
13078           "total_cpus": remote_info["cpu_total"],
13079           "i_pri_memory": i_p_mem,
13080           "i_pri_up_memory": i_p_up_mem,
13081           }
13082         pnr_dyn.update(node_results[nname])
13083         node_results[nname] = pnr_dyn
13084
13085     return node_results
13086
13087   @staticmethod
13088   def _ComputeInstanceData(cluster_info, i_list):
13089     """Compute global instance data.
13090
13091     """
13092     instance_data = {}
13093     for iinfo, beinfo in i_list:
13094       nic_data = []
13095       for nic in iinfo.nics:
13096         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13097         nic_dict = {
13098           "mac": nic.mac,
13099           "ip": nic.ip,
13100           "mode": filled_params[constants.NIC_MODE],
13101           "link": filled_params[constants.NIC_LINK],
13102           }
13103         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13104           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13105         nic_data.append(nic_dict)
13106       pir = {
13107         "tags": list(iinfo.GetTags()),
13108         "admin_up": iinfo.admin_up,
13109         "vcpus": beinfo[constants.BE_VCPUS],
13110         "memory": beinfo[constants.BE_MEMORY],
13111         "os": iinfo.os,
13112         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13113         "nics": nic_data,
13114         "disks": [{constants.IDISK_SIZE: dsk.size,
13115                    constants.IDISK_MODE: dsk.mode}
13116                   for dsk in iinfo.disks],
13117         "disk_template": iinfo.disk_template,
13118         "hypervisor": iinfo.hypervisor,
13119         }
13120       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13121                                                  pir["disks"])
13122       instance_data[iinfo.name] = pir
13123
13124     return instance_data
13125
13126   def _AddNewInstance(self):
13127     """Add new instance data to allocator structure.
13128
13129     This in combination with _AllocatorGetClusterData will create the
13130     correct structure needed as input for the allocator.
13131
13132     The checks for the completeness of the opcode must have already been
13133     done.
13134
13135     """
13136     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13137
13138     if self.disk_template in constants.DTS_INT_MIRROR:
13139       self.required_nodes = 2
13140     else:
13141       self.required_nodes = 1
13142
13143     request = {
13144       "name": self.name,
13145       "disk_template": self.disk_template,
13146       "tags": self.tags,
13147       "os": self.os,
13148       "vcpus": self.vcpus,
13149       "memory": self.memory,
13150       "disks": self.disks,
13151       "disk_space_total": disk_space,
13152       "nics": self.nics,
13153       "required_nodes": self.required_nodes,
13154       "hypervisor": self.hypervisor,
13155       }
13156
13157     return request
13158
13159   def _AddRelocateInstance(self):
13160     """Add relocate instance data to allocator structure.
13161
13162     This in combination with _IAllocatorGetClusterData will create the
13163     correct structure needed as input for the allocator.
13164
13165     The checks for the completeness of the opcode must have already been
13166     done.
13167
13168     """
13169     instance = self.cfg.GetInstanceInfo(self.name)
13170     if instance is None:
13171       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13172                                    " IAllocator" % self.name)
13173
13174     if instance.disk_template not in constants.DTS_MIRRORED:
13175       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13176                                  errors.ECODE_INVAL)
13177
13178     if instance.disk_template in constants.DTS_INT_MIRROR and \
13179         len(instance.secondary_nodes) != 1:
13180       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13181                                  errors.ECODE_STATE)
13182
13183     self.required_nodes = 1
13184     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13185     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13186
13187     request = {
13188       "name": self.name,
13189       "disk_space_total": disk_space,
13190       "required_nodes": self.required_nodes,
13191       "relocate_from": self.relocate_from,
13192       }
13193     return request
13194
13195   def _AddNodeEvacuate(self):
13196     """Get data for node-evacuate requests.
13197
13198     """
13199     return {
13200       "instances": self.instances,
13201       "evac_mode": self.evac_mode,
13202       }
13203
13204   def _AddChangeGroup(self):
13205     """Get data for node-evacuate requests.
13206
13207     """
13208     return {
13209       "instances": self.instances,
13210       "target_groups": self.target_groups,
13211       }
13212
13213   def _BuildInputData(self, fn, keydata):
13214     """Build input data structures.
13215
13216     """
13217     self._ComputeClusterData()
13218
13219     request = fn()
13220     request["type"] = self.mode
13221     for keyname, keytype in keydata:
13222       if keyname not in request:
13223         raise errors.ProgrammerError("Request parameter %s is missing" %
13224                                      keyname)
13225       val = request[keyname]
13226       if not keytype(val):
13227         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13228                                      " validation, value %s, expected"
13229                                      " type %s" % (keyname, val, keytype))
13230     self.in_data["request"] = request
13231
13232     self.in_text = serializer.Dump(self.in_data)
13233
13234   _STRING_LIST = ht.TListOf(ht.TString)
13235   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13236      # pylint: disable=E1101
13237      # Class '...' has no 'OP_ID' member
13238      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13239                           opcodes.OpInstanceMigrate.OP_ID,
13240                           opcodes.OpInstanceReplaceDisks.OP_ID])
13241      })))
13242
13243   _NEVAC_MOVED = \
13244     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13245                        ht.TItems([ht.TNonEmptyString,
13246                                   ht.TNonEmptyString,
13247                                   ht.TListOf(ht.TNonEmptyString),
13248                                  ])))
13249   _NEVAC_FAILED = \
13250     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13251                        ht.TItems([ht.TNonEmptyString,
13252                                   ht.TMaybeString,
13253                                  ])))
13254   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13255                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13256
13257   _MODE_DATA = {
13258     constants.IALLOCATOR_MODE_ALLOC:
13259       (_AddNewInstance,
13260        [
13261         ("name", ht.TString),
13262         ("memory", ht.TInt),
13263         ("disks", ht.TListOf(ht.TDict)),
13264         ("disk_template", ht.TString),
13265         ("os", ht.TString),
13266         ("tags", _STRING_LIST),
13267         ("nics", ht.TListOf(ht.TDict)),
13268         ("vcpus", ht.TInt),
13269         ("hypervisor", ht.TString),
13270         ], ht.TList),
13271     constants.IALLOCATOR_MODE_RELOC:
13272       (_AddRelocateInstance,
13273        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13274        ht.TList),
13275      constants.IALLOCATOR_MODE_NODE_EVAC:
13276       (_AddNodeEvacuate, [
13277         ("instances", _STRING_LIST),
13278         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13279         ], _NEVAC_RESULT),
13280      constants.IALLOCATOR_MODE_CHG_GROUP:
13281       (_AddChangeGroup, [
13282         ("instances", _STRING_LIST),
13283         ("target_groups", _STRING_LIST),
13284         ], _NEVAC_RESULT),
13285     }
13286
13287   def Run(self, name, validate=True, call_fn=None):
13288     """Run an instance allocator and return the results.
13289
13290     """
13291     if call_fn is None:
13292       call_fn = self.rpc.call_iallocator_runner
13293
13294     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13295     result.Raise("Failure while running the iallocator script")
13296
13297     self.out_text = result.payload
13298     if validate:
13299       self._ValidateResult()
13300
13301   def _ValidateResult(self):
13302     """Process the allocator results.
13303
13304     This will process and if successful save the result in
13305     self.out_data and the other parameters.
13306
13307     """
13308     try:
13309       rdict = serializer.Load(self.out_text)
13310     except Exception, err:
13311       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13312
13313     if not isinstance(rdict, dict):
13314       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13315
13316     # TODO: remove backwards compatiblity in later versions
13317     if "nodes" in rdict and "result" not in rdict:
13318       rdict["result"] = rdict["nodes"]
13319       del rdict["nodes"]
13320
13321     for key in "success", "info", "result":
13322       if key not in rdict:
13323         raise errors.OpExecError("Can't parse iallocator results:"
13324                                  " missing key '%s'" % key)
13325       setattr(self, key, rdict[key])
13326
13327     if not self._result_check(self.result):
13328       raise errors.OpExecError("Iallocator returned invalid result,"
13329                                " expected %s, got %s" %
13330                                (self._result_check, self.result),
13331                                errors.ECODE_INVAL)
13332
13333     if self.mode == constants.IALLOCATOR_MODE_RELOC:
13334       assert self.relocate_from is not None
13335       assert self.required_nodes == 1
13336
13337       node2group = dict((name, ndata["group"])
13338                         for (name, ndata) in self.in_data["nodes"].items())
13339
13340       fn = compat.partial(self._NodesToGroups, node2group,
13341                           self.in_data["nodegroups"])
13342
13343       instance = self.cfg.GetInstanceInfo(self.name)
13344       request_groups = fn(self.relocate_from + [instance.primary_node])
13345       result_groups = fn(rdict["result"] + [instance.primary_node])
13346
13347       if self.success and not set(result_groups).issubset(request_groups):
13348         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13349                                  " differ from original groups (%s)" %
13350                                  (utils.CommaJoin(result_groups),
13351                                   utils.CommaJoin(request_groups)))
13352
13353     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13354       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13355
13356     self.out_data = rdict
13357
13358   @staticmethod
13359   def _NodesToGroups(node2group, groups, nodes):
13360     """Returns a list of unique group names for a list of nodes.
13361
13362     @type node2group: dict
13363     @param node2group: Map from node name to group UUID
13364     @type groups: dict
13365     @param groups: Group information
13366     @type nodes: list
13367     @param nodes: Node names
13368
13369     """
13370     result = set()
13371
13372     for node in nodes:
13373       try:
13374         group_uuid = node2group[node]
13375       except KeyError:
13376         # Ignore unknown node
13377         pass
13378       else:
13379         try:
13380           group = groups[group_uuid]
13381         except KeyError:
13382           # Can't find group, let's use UUID
13383           group_name = group_uuid
13384         else:
13385           group_name = group["name"]
13386
13387         result.add(group_name)
13388
13389     return sorted(result)
13390
13391
13392 class LUTestAllocator(NoHooksLU):
13393   """Run allocator tests.
13394
13395   This LU runs the allocator tests
13396
13397   """
13398   def CheckPrereq(self):
13399     """Check prerequisites.
13400
13401     This checks the opcode parameters depending on the director and mode test.
13402
13403     """
13404     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13405       for attr in ["memory", "disks", "disk_template",
13406                    "os", "tags", "nics", "vcpus"]:
13407         if not hasattr(self.op, attr):
13408           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13409                                      attr, errors.ECODE_INVAL)
13410       iname = self.cfg.ExpandInstanceName(self.op.name)
13411       if iname is not None:
13412         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13413                                    iname, errors.ECODE_EXISTS)
13414       if not isinstance(self.op.nics, list):
13415         raise errors.OpPrereqError("Invalid parameter 'nics'",
13416                                    errors.ECODE_INVAL)
13417       if not isinstance(self.op.disks, list):
13418         raise errors.OpPrereqError("Invalid parameter 'disks'",
13419                                    errors.ECODE_INVAL)
13420       for row in self.op.disks:
13421         if (not isinstance(row, dict) or
13422             constants.IDISK_SIZE not in row or
13423             not isinstance(row[constants.IDISK_SIZE], int) or
13424             constants.IDISK_MODE not in row or
13425             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13426           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13427                                      " parameter", errors.ECODE_INVAL)
13428       if self.op.hypervisor is None:
13429         self.op.hypervisor = self.cfg.GetHypervisorType()
13430     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13431       fname = _ExpandInstanceName(self.cfg, self.op.name)
13432       self.op.name = fname
13433       self.relocate_from = \
13434           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13435     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13436                           constants.IALLOCATOR_MODE_NODE_EVAC):
13437       if not self.op.instances:
13438         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13439       self.op.instances = _GetWantedInstances(self, self.op.instances)
13440     else:
13441       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13442                                  self.op.mode, errors.ECODE_INVAL)
13443
13444     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13445       if self.op.allocator is None:
13446         raise errors.OpPrereqError("Missing allocator name",
13447                                    errors.ECODE_INVAL)
13448     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13449       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13450                                  self.op.direction, errors.ECODE_INVAL)
13451
13452   def Exec(self, feedback_fn):
13453     """Run the allocator test.
13454
13455     """
13456     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13457       ial = IAllocator(self.cfg, self.rpc,
13458                        mode=self.op.mode,
13459                        name=self.op.name,
13460                        memory=self.op.memory,
13461                        disks=self.op.disks,
13462                        disk_template=self.op.disk_template,
13463                        os=self.op.os,
13464                        tags=self.op.tags,
13465                        nics=self.op.nics,
13466                        vcpus=self.op.vcpus,
13467                        hypervisor=self.op.hypervisor,
13468                        )
13469     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13470       ial = IAllocator(self.cfg, self.rpc,
13471                        mode=self.op.mode,
13472                        name=self.op.name,
13473                        relocate_from=list(self.relocate_from),
13474                        )
13475     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13476       ial = IAllocator(self.cfg, self.rpc,
13477                        mode=self.op.mode,
13478                        instances=self.op.instances,
13479                        target_groups=self.op.target_groups)
13480     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13481       ial = IAllocator(self.cfg, self.rpc,
13482                        mode=self.op.mode,
13483                        instances=self.op.instances,
13484                        evac_mode=self.op.evac_mode)
13485     else:
13486       raise errors.ProgrammerError("Uncatched mode %s in"
13487                                    " LUTestAllocator.Exec", self.op.mode)
13488
13489     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13490       result = ial.in_text
13491     else:
13492       ial.Run(self.op.allocator, validate=False)
13493       result = ial.out_text
13494     return result
13495
13496
13497 #: Query type implementations
13498 _QUERY_IMPL = {
13499   constants.QR_INSTANCE: _InstanceQuery,
13500   constants.QR_NODE: _NodeQuery,
13501   constants.QR_GROUP: _GroupQuery,
13502   constants.QR_OS: _OsQuery,
13503   }
13504
13505 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13506
13507
13508 def _GetQueryImplementation(name):
13509   """Returns the implemtnation for a query type.
13510
13511   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13512
13513   """
13514   try:
13515     return _QUERY_IMPL[name]
13516   except KeyError:
13517     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13518                                errors.ECODE_INVAL)