code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62
  63 import ganeti.masterd.instance # pylint: disable=W0611
  64
  65
  66 class ResultWithJobs:
  67   """Data container for LU results with jobs.
  68
  69   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  70   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  71   contained in the C{jobs} attribute and include the job IDs in the opcode
  72   result.
  73
  74   """
  75   def __init__(self, jobs, **kwargs):
  76     """Initializes this class.
  77
  78     Additional return values can be specified as keyword arguments.
  79
  80     @type jobs: list of lists of L{opcode.OpCode}
  81     @param jobs: A list of lists of opcode objects
  82
  83     """
  84     self.jobs = jobs
  85     self.other = kwargs
  86
  87
  88 class LogicalUnit(object):
  89   """Logical Unit base class.
  90
  91   Subclasses must follow these rules:
  92     - implement ExpandNames
  93     - implement CheckPrereq (except when tasklets are used)
  94     - implement Exec (except when tasklets are used)
  95     - implement BuildHooksEnv
  96     - implement BuildHooksNodes
  97     - redefine HPATH and HTYPE
  98     - optionally redefine their run requirements:
  99         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 100
 101   Note that all commands require root permissions.
 102
 103   @ivar dry_run_result: the value (if any) that will be returned to the caller
 104       in dry-run mode (signalled by opcode dry_run parameter)
 105
 106   """
 107   HPATH = None
 108   HTYPE = None
 109   REQ_BGL = True
 110
 111   def __init__(self, processor, op, context, rpc):
 112     """Constructor for LogicalUnit.
 113
 114     This needs to be overridden in derived classes in order to check op
 115     validity.
 116
 117     """
 118     self.proc = processor
 119     self.op = op
 120     self.cfg = context.cfg
 121     self.glm = context.glm
 122     # readability alias
 123     self.owned_locks = context.glm.list_owned
 124     self.context = context
 125     self.rpc = rpc
 126     # Dicts used to declare locking needs to mcpu
 127     self.needed_locks = None
 128     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 129     self.add_locks = {}
 130     self.remove_locks = {}
 131     # Used to force good behavior when calling helper functions
 132     self.recalculate_locks = {}
 133     # logging
 134     self.Log = processor.Log # pylint: disable=C0103
 135     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 136     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 137     self.LogStep = processor.LogStep # pylint: disable=C0103
 138     # support for dry-run
 139     self.dry_run_result = None
 140     # support for generic debug attribute
 141     if (not hasattr(self.op, "debug_level") or
 142         not isinstance(self.op.debug_level, int)):
 143       self.op.debug_level = 0
 144
 145     # Tasklets
 146     self.tasklets = None
 147
 148     # Validate opcode parameters and set defaults
 149     self.op.Validate(True)
 150
 151     self.CheckArguments()
 152
 153   def CheckArguments(self):
 154     """Check syntactic validity for the opcode arguments.
 155
 156     This method is for doing a simple syntactic check and ensure
 157     validity of opcode parameters, without any cluster-related
 158     checks. While the same can be accomplished in ExpandNames and/or
 159     CheckPrereq, doing these separate is better because:
 160
 161       - ExpandNames is left as as purely a lock-related function
 162       - CheckPrereq is run after we have acquired locks (and possible
 163         waited for them)
 164
 165     The function is allowed to change the self.op attribute so that
 166     later methods can no longer worry about missing parameters.
 167
 168     """
 169     pass
 170
 171   def ExpandNames(self):
 172     """Expand names for this LU.
 173
 174     This method is called before starting to execute the opcode, and it should
 175     update all the parameters of the opcode to their canonical form (e.g. a
 176     short node name must be fully expanded after this method has successfully
 177     completed). This way locking, hooks, logging, etc. can work correctly.
 178
 179     LUs which implement this method must also populate the self.needed_locks
 180     member, as a dict with lock levels as keys, and a list of needed lock names
 181     as values. Rules:
 182
 183       - use an empty dict if you don't need any lock
 184       - if you don't need any lock at a particular level omit that level
 185       - don't put anything for the BGL level
 186       - if you want all locks at a level use locking.ALL_SET as a value
 187
 188     If you need to share locks (rather than acquire them exclusively) at one
 189     level you can modify self.share_locks, setting a true value (usually 1) for
 190     that level. By default locks are not shared.
 191
 192     This function can also define a list of tasklets, which then will be
 193     executed in order instead of the usual LU-level CheckPrereq and Exec
 194     functions, if those are not defined by the LU.
 195
 196     Examples::
 197
 198       # Acquire all nodes and one instance
 199       self.needed_locks = {
 200         locking.LEVEL_NODE: locking.ALL_SET,
 201         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 202       }
 203       # Acquire just two nodes
 204       self.needed_locks = {
 205         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 206       }
 207       # Acquire no locks
 208       self.needed_locks = {} # No, you can't leave it to the default value None
 209
 210     """
 211     # The implementation of this method is mandatory only if the new LU is
 212     # concurrent, so that old LUs don't need to be changed all at the same
 213     # time.
 214     if self.REQ_BGL:
 215       self.needed_locks = {} # Exclusive LUs don't need locks.
 216     else:
 217       raise NotImplementedError
 218
 219   def DeclareLocks(self, level):
 220     """Declare LU locking needs for a level
 221
 222     While most LUs can just declare their locking needs at ExpandNames time,
 223     sometimes there's the need to calculate some locks after having acquired
 224     the ones before. This function is called just before acquiring locks at a
 225     particular level, but after acquiring the ones at lower levels, and permits
 226     such calculations. It can be used to modify self.needed_locks, and by
 227     default it does nothing.
 228
 229     This function is only called if you have something already set in
 230     self.needed_locks for the level.
 231
 232     @param level: Locking level which is going to be locked
 233     @type level: member of ganeti.locking.LEVELS
 234
 235     """
 236
 237   def CheckPrereq(self):
 238     """Check prerequisites for this LU.
 239
 240     This method should check that the prerequisites for the execution
 241     of this LU are fulfilled. It can do internode communication, but
 242     it should be idempotent - no cluster or system changes are
 243     allowed.
 244
 245     The method should raise errors.OpPrereqError in case something is
 246     not fulfilled. Its return value is ignored.
 247
 248     This method should also update all the parameters of the opcode to
 249     their canonical form if it hasn't been done by ExpandNames before.
 250
 251     """
 252     if self.tasklets is not None:
 253       for (idx, tl) in enumerate(self.tasklets):
 254         logging.debug("Checking prerequisites for tasklet %s/%s",
 255                       idx + 1, len(self.tasklets))
 256         tl.CheckPrereq()
 257     else:
 258       pass
 259
 260   def Exec(self, feedback_fn):
 261     """Execute the LU.
 262
 263     This method should implement the actual work. It should raise
 264     errors.OpExecError for failures that are somewhat dealt with in
 265     code, or expected.
 266
 267     """
 268     if self.tasklets is not None:
 269       for (idx, tl) in enumerate(self.tasklets):
 270         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 271         tl.Exec(feedback_fn)
 272     else:
 273       raise NotImplementedError
 274
 275   def BuildHooksEnv(self):
 276     """Build hooks environment for this LU.
 277
 278     @rtype: dict
 279     @return: Dictionary containing the environment that will be used for
 280       running the hooks for this LU. The keys of the dict must not be prefixed
 281       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 282       will extend the environment with additional variables. If no environment
 283       should be defined, an empty dictionary should be returned (not C{None}).
 284     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 285       will not be called.
 286
 287     """
 288     raise NotImplementedError
 289
 290   def BuildHooksNodes(self):
 291     """Build list of nodes to run LU's hooks.
 292
 293     @rtype: tuple; (list, list)
 294     @return: Tuple containing a list of node names on which the hook
 295       should run before the execution and a list of node names on which the
 296       hook should run after the execution. No nodes should be returned as an
 297       empty list (and not None).
 298     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 299       will not be called.
 300
 301     """
 302     raise NotImplementedError
 303
 304   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 305     """Notify the LU about the results of its hooks.
 306
 307     This method is called every time a hooks phase is executed, and notifies
 308     the Logical Unit about the hooks' result. The LU can then use it to alter
 309     its result based on the hooks.  By default the method does nothing and the
 310     previous result is passed back unchanged but any LU can define it if it
 311     wants to use the local cluster hook-scripts somehow.
 312
 313     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 314         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 315     @param hook_results: the results of the multi-node hooks rpc call
 316     @param feedback_fn: function used send feedback back to the caller
 317     @param lu_result: the previous Exec result this LU had, or None
 318         in the PRE phase
 319     @return: the new Exec result, based on the previous result
 320         and hook results
 321
 322     """
 323     # API must be kept, thus we ignore the unused argument and could
 324     # be a function warnings
 325     # pylint: disable=W0613,R0201
 326     return lu_result
 327
 328   def _ExpandAndLockInstance(self):
 329     """Helper function to expand and lock an instance.
 330
 331     Many LUs that work on an instance take its name in self.op.instance_name
 332     and need to expand it and then declare the expanded name for locking. This
 333     function does it, and then updates self.op.instance_name to the expanded
 334     name. It also initializes needed_locks as a dict, if this hasn't been done
 335     before.
 336
 337     """
 338     if self.needed_locks is None:
 339       self.needed_locks = {}
 340     else:
 341       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 342         "_ExpandAndLockInstance called with instance-level locks set"
 343     self.op.instance_name = _ExpandInstanceName(self.cfg,
 344                                                 self.op.instance_name)
 345     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 346
 347   def _LockInstancesNodes(self, primary_only=False):
 348     """Helper function to declare instances' nodes for locking.
 349
 350     This function should be called after locking one or more instances to lock
 351     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 352     with all primary or secondary nodes for instances already locked and
 353     present in self.needed_locks[locking.LEVEL_INSTANCE].
 354
 355     It should be called from DeclareLocks, and for safety only works if
 356     self.recalculate_locks[locking.LEVEL_NODE] is set.
 357
 358     In the future it may grow parameters to just lock some instance's nodes, or
 359     to just lock primaries or secondary nodes, if needed.
 360
 361     If should be called in DeclareLocks in a way similar to::
 362
 363       if level == locking.LEVEL_NODE:
 364         self._LockInstancesNodes()
 365
 366     @type primary_only: boolean
 367     @param primary_only: only lock primary nodes of locked instances
 368
 369     """
 370     assert locking.LEVEL_NODE in self.recalculate_locks, \
 371       "_LockInstancesNodes helper function called with no nodes to recalculate"
 372
 373     # TODO: check if we're really been called with the instance locks held
 374
 375     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 376     # future we might want to have different behaviors depending on the value
 377     # of self.recalculate_locks[locking.LEVEL_NODE]
 378     wanted_nodes = []
 379     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 380     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 381       wanted_nodes.append(instance.primary_node)
 382       if not primary_only:
 383         wanted_nodes.extend(instance.secondary_nodes)
 384
 385     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 386       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 387     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 388       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 389
 390     del self.recalculate_locks[locking.LEVEL_NODE]
 391
 392
 393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 394   """Simple LU which runs no hooks.
 395
 396   This LU is intended as a parent for other LogicalUnits which will
 397   run no hooks, in order to reduce duplicate code.
 398
 399   """
 400   HPATH = None
 401   HTYPE = None
 402
 403   def BuildHooksEnv(self):
 404     """Empty BuildHooksEnv for NoHooksLu.
 405
 406     This just raises an error.
 407
 408     """
 409     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 410
 411   def BuildHooksNodes(self):
 412     """Empty BuildHooksNodes for NoHooksLU.
 413
 414     """
 415     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 416
 417
 418 class Tasklet:
 419   """Tasklet base class.
 420
 421   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 422   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 423   tasklets know nothing about locks.
 424
 425   Subclasses must follow these rules:
 426     - Implement CheckPrereq
 427     - Implement Exec
 428
 429   """
 430   def __init__(self, lu):
 431     self.lu = lu
 432
 433     # Shortcuts
 434     self.cfg = lu.cfg
 435     self.rpc = lu.rpc
 436
 437   def CheckPrereq(self):
 438     """Check prerequisites for this tasklets.
 439
 440     This method should check whether the prerequisites for the execution of
 441     this tasklet are fulfilled. It can do internode communication, but it
 442     should be idempotent - no cluster or system changes are allowed.
 443
 444     The method should raise errors.OpPrereqError in case something is not
 445     fulfilled. Its return value is ignored.
 446
 447     This method should also update all parameters to their canonical form if it
 448     hasn't been done before.
 449
 450     """
 451     pass
 452
 453   def Exec(self, feedback_fn):
 454     """Execute the tasklet.
 455
 456     This method should implement the actual work. It should raise
 457     errors.OpExecError for failures that are somewhat dealt with in code, or
 458     expected.
 459
 460     """
 461     raise NotImplementedError
 462
 463
 464 class _QueryBase:
 465   """Base for query utility classes.
 466
 467   """
 468   #: Attribute holding field definitions
 469   FIELDS = None
 470
 471   def __init__(self, filter_, fields, use_locking):
 472     """Initializes this class.
 473
 474     """
 475     self.use_locking = use_locking
 476
 477     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 478                              namefield="name")
 479     self.requested_data = self.query.RequestedData()
 480     self.names = self.query.RequestedNames()
 481
 482     # Sort only if no names were requested
 483     self.sort_by_name = not self.names
 484
 485     self.do_locking = None
 486     self.wanted = None
 487
 488   def _GetNames(self, lu, all_names, lock_level):
 489     """Helper function to determine names asked for in the query.
 490
 491     """
 492     if self.do_locking:
 493       names = lu.owned_locks(lock_level)
 494     else:
 495       names = all_names
 496
 497     if self.wanted == locking.ALL_SET:
 498       assert not self.names
 499       # caller didn't specify names, so ordering is not important
 500       return utils.NiceSort(names)
 501
 502     # caller specified names and we must keep the same order
 503     assert self.names
 504     assert not self.do_locking or lu.glm.is_owned(lock_level)
 505
 506     missing = set(self.wanted).difference(names)
 507     if missing:
 508       raise errors.OpExecError("Some items were removed before retrieving"
 509                                " their data: %s" % missing)
 510
 511     # Return expanded names
 512     return self.wanted
 513
 514   def ExpandNames(self, lu):
 515     """Expand names for this query.
 516
 517     See L{LogicalUnit.ExpandNames}.
 518
 519     """
 520     raise NotImplementedError()
 521
 522   def DeclareLocks(self, lu, level):
 523     """Declare locks for this query.
 524
 525     See L{LogicalUnit.DeclareLocks}.
 526
 527     """
 528     raise NotImplementedError()
 529
 530   def _GetQueryData(self, lu):
 531     """Collects all data for this query.
 532
 533     @return: Query data object
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def NewStyleQuery(self, lu):
 539     """Collect data and execute query.
 540
 541     """
 542     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 543                                   sort_by_name=self.sort_by_name)
 544
 545   def OldStyleQuery(self, lu):
 546     """Collect data and execute query.
 547
 548     """
 549     return self.query.OldStyleQuery(self._GetQueryData(lu),
 550                                     sort_by_name=self.sort_by_name)
 551
 552
 553 def _ShareAll():
 554   """Returns a dict declaring all lock levels shared.
 555
 556   """
 557   return dict.fromkeys(locking.LEVELS, 1)
 558
 559
 560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 561   """Checks if the owned node groups are still correct for an instance.
 562
 563   @type cfg: L{config.ConfigWriter}
 564   @param cfg: The cluster configuration
 565   @type instance_name: string
 566   @param instance_name: Instance name
 567   @type owned_groups: set or frozenset
 568   @param owned_groups: List of currently owned node groups
 569
 570   """
 571   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 572
 573   if not owned_groups.issuperset(inst_groups):
 574     raise errors.OpPrereqError("Instance %s's node groups changed since"
 575                                " locks were acquired, current groups are"
 576                                " are '%s', owning groups '%s'; retry the"
 577                                " operation" %
 578                                (instance_name,
 579                                 utils.CommaJoin(inst_groups),
 580                                 utils.CommaJoin(owned_groups)),
 581                                errors.ECODE_STATE)
 582
 583   return inst_groups
 584
 585
 586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 587   """Checks if the instances in a node group are still correct.
 588
 589   @type cfg: L{config.ConfigWriter}
 590   @param cfg: The cluster configuration
 591   @type group_uuid: string
 592   @param group_uuid: Node group UUID
 593   @type owned_instances: set or frozenset
 594   @param owned_instances: List of currently owned instances
 595
 596   """
 597   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 598   if owned_instances != wanted_instances:
 599     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 600                                " locks were acquired, wanted '%s', have '%s';"
 601                                " retry the operation" %
 602                                (group_uuid,
 603                                 utils.CommaJoin(wanted_instances),
 604                                 utils.CommaJoin(owned_instances)),
 605                                errors.ECODE_STATE)
 606
 607   return wanted_instances
 608
 609
 610 def _SupportsOob(cfg, node):
 611   """Tells if node supports OOB.
 612
 613   @type cfg: L{config.ConfigWriter}
 614   @param cfg: The cluster configuration
 615   @type node: L{objects.Node}
 616   @param node: The node
 617   @return: The OOB script if supported or an empty string otherwise
 618
 619   """
 620   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 621
 622
 623 def _GetWantedNodes(lu, nodes):
 624   """Returns list of checked and expanded node names.
 625
 626   @type lu: L{LogicalUnit}
 627   @param lu: the logical unit on whose behalf we execute
 628   @type nodes: list
 629   @param nodes: list of node names or None for all nodes
 630   @rtype: list
 631   @return: the list of nodes, sorted
 632   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 633
 634   """
 635   if nodes:
 636     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 637
 638   return utils.NiceSort(lu.cfg.GetNodeList())
 639
 640
 641 def _GetWantedInstances(lu, instances):
 642   """Returns list of checked and expanded instance names.
 643
 644   @type lu: L{LogicalUnit}
 645   @param lu: the logical unit on whose behalf we execute
 646   @type instances: list
 647   @param instances: list of instance names or None for all instances
 648   @rtype: list
 649   @return: the list of instances, sorted
 650   @raise errors.OpPrereqError: if the instances parameter is wrong type
 651   @raise errors.OpPrereqError: if any of the passed instances is not found
 652
 653   """
 654   if instances:
 655     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 656   else:
 657     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 658   return wanted
 659
 660
 661 def _GetUpdatedParams(old_params, update_dict,
 662                       use_default=True, use_none=False):
 663   """Return the new version of a parameter dictionary.
 664
 665   @type old_params: dict
 666   @param old_params: old parameters
 667   @type update_dict: dict
 668   @param update_dict: dict containing new parameter values, or
 669       constants.VALUE_DEFAULT to reset the parameter to its default
 670       value
 671   @param use_default: boolean
 672   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 673       values as 'to be deleted' values
 674   @param use_none: boolean
 675   @type use_none: whether to recognise C{None} values as 'to be
 676       deleted' values
 677   @rtype: dict
 678   @return: the new parameter dictionary
 679
 680   """
 681   params_copy = copy.deepcopy(old_params)
 682   for key, val in update_dict.iteritems():
 683     if ((use_default and val == constants.VALUE_DEFAULT) or
 684         (use_none and val is None)):
 685       try:
 686         del params_copy[key]
 687       except KeyError:
 688         pass
 689     else:
 690       params_copy[key] = val
 691   return params_copy
 692
 693
 694 def _ReleaseLocks(lu, level, names=None, keep=None):
 695   """Releases locks owned by an LU.
 696
 697   @type lu: L{LogicalUnit}
 698   @param level: Lock level
 699   @type names: list or None
 700   @param names: Names of locks to release
 701   @type keep: list or None
 702   @param keep: Names of locks to retain
 703
 704   """
 705   assert not (keep is not None and names is not None), \
 706          "Only one of the 'names' and the 'keep' parameters can be given"
 707
 708   if names is not None:
 709     should_release = names.__contains__
 710   elif keep:
 711     should_release = lambda name: name not in keep
 712   else:
 713     should_release = None
 714
 715   if should_release:
 716     retain = []
 717     release = []
 718
 719     # Determine which locks to release
 720     for name in lu.owned_locks(level):
 721       if should_release(name):
 722         release.append(name)
 723       else:
 724         retain.append(name)
 725
 726     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 727
 728     # Release just some locks
 729     lu.glm.release(level, names=release)
 730
 731     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 732   else:
 733     # Release everything
 734     lu.glm.release(level)
 735
 736     assert not lu.glm.is_owned(level), "No locks should be owned"
 737
 738
 739 def _MapInstanceDisksToNodes(instances):
 740   """Creates a map from (node, volume) to instance name.
 741
 742   @type instances: list of L{objects.Instance}
 743   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 744
 745   """
 746   return dict(((node, vol), inst.name)
 747               for inst in instances
 748               for (node, vols) in inst.MapLVsByNode().items()
 749               for vol in vols)
 750
 751
 752 def _RunPostHook(lu, node_name):
 753   """Runs the post-hook for an opcode on a single node.
 754
 755   """
 756   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 757   try:
 758     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 759   except:
 760     # pylint: disable=W0702
 761     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 762
 763
 764 def _CheckOutputFields(static, dynamic, selected):
 765   """Checks whether all selected fields are valid.
 766
 767   @type static: L{utils.FieldSet}
 768   @param static: static fields set
 769   @type dynamic: L{utils.FieldSet}
 770   @param dynamic: dynamic fields set
 771
 772   """
 773   f = utils.FieldSet()
 774   f.Extend(static)
 775   f.Extend(dynamic)
 776
 777   delta = f.NonMatching(selected)
 778   if delta:
 779     raise errors.OpPrereqError("Unknown output fields selected: %s"
 780                                % ",".join(delta), errors.ECODE_INVAL)
 781
 782
 783 def _CheckGlobalHvParams(params):
 784   """Validates that given hypervisor params are not global ones.
 785
 786   This will ensure that instances don't get customised versions of
 787   global params.
 788
 789   """
 790   used_globals = constants.HVC_GLOBALS.intersection(params)
 791   if used_globals:
 792     msg = ("The following hypervisor parameters are global and cannot"
 793            " be customized at instance level, please modify them at"
 794            " cluster level: %s" % utils.CommaJoin(used_globals))
 795     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 796
 797
 798 def _CheckNodeOnline(lu, node, msg=None):
 799   """Ensure that a given node is online.
 800
 801   @param lu: the LU on behalf of which we make the check
 802   @param node: the node to check
 803   @param msg: if passed, should be a message to replace the default one
 804   @raise errors.OpPrereqError: if the node is offline
 805
 806   """
 807   if msg is None:
 808     msg = "Can't use offline node"
 809   if lu.cfg.GetNodeInfo(node).offline:
 810     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 811
 812
 813 def _CheckNodeNotDrained(lu, node):
 814   """Ensure that a given node is not drained.
 815
 816   @param lu: the LU on behalf of which we make the check
 817   @param node: the node to check
 818   @raise errors.OpPrereqError: if the node is drained
 819
 820   """
 821   if lu.cfg.GetNodeInfo(node).drained:
 822     raise errors.OpPrereqError("Can't use drained node %s" % node,
 823                                errors.ECODE_STATE)
 824
 825
 826 def _CheckNodeVmCapable(lu, node):
 827   """Ensure that a given node is vm capable.
 828
 829   @param lu: the LU on behalf of which we make the check
 830   @param node: the node to check
 831   @raise errors.OpPrereqError: if the node is not vm capable
 832
 833   """
 834   if not lu.cfg.GetNodeInfo(node).vm_capable:
 835     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 836                                errors.ECODE_STATE)
 837
 838
 839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 840   """Ensure that a node supports a given OS.
 841
 842   @param lu: the LU on behalf of which we make the check
 843   @param node: the node to check
 844   @param os_name: the OS to query about
 845   @param force_variant: whether to ignore variant errors
 846   @raise errors.OpPrereqError: if the node is not supporting the OS
 847
 848   """
 849   result = lu.rpc.call_os_get(node, os_name)
 850   result.Raise("OS '%s' not in supported OS list for node %s" %
 851                (os_name, node),
 852                prereq=True, ecode=errors.ECODE_INVAL)
 853   if not force_variant:
 854     _CheckOSVariant(result.payload, os_name)
 855
 856
 857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 858   """Ensure that a node has the given secondary ip.
 859
 860   @type lu: L{LogicalUnit}
 861   @param lu: the LU on behalf of which we make the check
 862   @type node: string
 863   @param node: the node to check
 864   @type secondary_ip: string
 865   @param secondary_ip: the ip to check
 866   @type prereq: boolean
 867   @param prereq: whether to throw a prerequisite or an execute error
 868   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 869   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 870
 871   """
 872   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 873   result.Raise("Failure checking secondary ip on node %s" % node,
 874                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 875   if not result.payload:
 876     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 877            " please fix and re-run this command" % secondary_ip)
 878     if prereq:
 879       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 880     else:
 881       raise errors.OpExecError(msg)
 882
 883
 884 def _GetClusterDomainSecret():
 885   """Reads the cluster domain secret.
 886
 887   """
 888   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 889                                strict=True)
 890
 891
 892 def _CheckInstanceDown(lu, instance, reason):
 893   """Ensure that an instance is not running."""
 894   if instance.admin_up:
 895     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 896                                (instance.name, reason), errors.ECODE_STATE)
 897
 898   pnode = instance.primary_node
 899   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 900   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 901               prereq=True, ecode=errors.ECODE_ENVIRON)
 902
 903   if instance.name in ins_l.payload:
 904     raise errors.OpPrereqError("Instance %s is running, %s" %
 905                                (instance.name, reason), errors.ECODE_STATE)
 906
 907
 908 def _ExpandItemName(fn, name, kind):
 909   """Expand an item name.
 910
 911   @param fn: the function to use for expansion
 912   @param name: requested item name
 913   @param kind: text description ('Node' or 'Instance')
 914   @return: the resolved (full) name
 915   @raise errors.OpPrereqError: if the item is not found
 916
 917   """
 918   full_name = fn(name)
 919   if full_name is None:
 920     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 921                                errors.ECODE_NOENT)
 922   return full_name
 923
 924
 925 def _ExpandNodeName(cfg, name):
 926   """Wrapper over L{_ExpandItemName} for nodes."""
 927   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 928
 929
 930 def _ExpandInstanceName(cfg, name):
 931   """Wrapper over L{_ExpandItemName} for instance."""
 932   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 933
 934
 935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 936                           memory, vcpus, nics, disk_template, disks,
 937                           bep, hvp, hypervisor_name, tags):
 938   """Builds instance related env variables for hooks
 939
 940   This builds the hook environment from individual variables.
 941
 942   @type name: string
 943   @param name: the name of the instance
 944   @type primary_node: string
 945   @param primary_node: the name of the instance's primary node
 946   @type secondary_nodes: list
 947   @param secondary_nodes: list of secondary nodes as strings
 948   @type os_type: string
 949   @param os_type: the name of the instance's OS
 950   @type status: boolean
 951   @param status: the should_run status of the instance
 952   @type memory: string
 953   @param memory: the memory size of the instance
 954   @type vcpus: string
 955   @param vcpus: the count of VCPUs the instance has
 956   @type nics: list
 957   @param nics: list of tuples (ip, mac, mode, link) representing
 958       the NICs the instance has
 959   @type disk_template: string
 960   @param disk_template: the disk template of the instance
 961   @type disks: list
 962   @param disks: the list of (size, mode) pairs
 963   @type bep: dict
 964   @param bep: the backend parameters for the instance
 965   @type hvp: dict
 966   @param hvp: the hypervisor parameters for the instance
 967   @type hypervisor_name: string
 968   @param hypervisor_name: the hypervisor for the instance
 969   @type tags: list
 970   @param tags: list of instance tags as strings
 971   @rtype: dict
 972   @return: the hook environment for this instance
 973
 974   """
 975   if status:
 976     str_status = "up"
 977   else:
 978     str_status = "down"
 979   env = {
 980     "OP_TARGET": name,
 981     "INSTANCE_NAME": name,
 982     "INSTANCE_PRIMARY": primary_node,
 983     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 984     "INSTANCE_OS_TYPE": os_type,
 985     "INSTANCE_STATUS": str_status,
 986     "INSTANCE_MEMORY": memory,
 987     "INSTANCE_VCPUS": vcpus,
 988     "INSTANCE_DISK_TEMPLATE": disk_template,
 989     "INSTANCE_HYPERVISOR": hypervisor_name,
 990   }
 991
 992   if nics:
 993     nic_count = len(nics)
 994     for idx, (ip, mac, mode, link) in enumerate(nics):
 995       if ip is None:
 996         ip = ""
 997       env["INSTANCE_NIC%d_IP" % idx] = ip
 998       env["INSTANCE_NIC%d_MAC" % idx] = mac
 999       env["INSTANCE_NIC%d_MODE" % idx] = mode
1000       env["INSTANCE_NIC%d_LINK" % idx] = link
1001       if mode == constants.NIC_MODE_BRIDGED:
1002         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003   else:
1004     nic_count = 0
1005
1006   env["INSTANCE_NIC_COUNT"] = nic_count
1007
1008   if disks:
1009     disk_count = len(disks)
1010     for idx, (size, mode) in enumerate(disks):
1011       env["INSTANCE_DISK%d_SIZE" % idx] = size
1012       env["INSTANCE_DISK%d_MODE" % idx] = mode
1013   else:
1014     disk_count = 0
1015
1016   env["INSTANCE_DISK_COUNT"] = disk_count
1017
1018   if not tags:
1019     tags = []
1020
1021   env["INSTANCE_TAGS"] = " ".join(tags)
1022
1023   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024     for key, value in source.items():
1025       env["INSTANCE_%s_%s" % (kind, key)] = value
1026
1027   return env
1028
1029
1030 def _NICListToTuple(lu, nics):
1031   """Build a list of nic information tuples.
1032
1033   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034   value in LUInstanceQueryData.
1035
1036   @type lu:  L{LogicalUnit}
1037   @param lu: the logical unit on whose behalf we execute
1038   @type nics: list of L{objects.NIC}
1039   @param nics: list of nics to convert to hooks tuples
1040
1041   """
1042   hooks_nics = []
1043   cluster = lu.cfg.GetClusterInfo()
1044   for nic in nics:
1045     ip = nic.ip
1046     mac = nic.mac
1047     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048     mode = filled_params[constants.NIC_MODE]
1049     link = filled_params[constants.NIC_LINK]
1050     hooks_nics.append((ip, mac, mode, link))
1051   return hooks_nics
1052
1053
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055   """Builds instance related env variables for hooks from an object.
1056
1057   @type lu: L{LogicalUnit}
1058   @param lu: the logical unit on whose behalf we execute
1059   @type instance: L{objects.Instance}
1060   @param instance: the instance for which we should build the
1061       environment
1062   @type override: dict
1063   @param override: dictionary with key/values that will override
1064       our values
1065   @rtype: dict
1066   @return: the hook environment dictionary
1067
1068   """
1069   cluster = lu.cfg.GetClusterInfo()
1070   bep = cluster.FillBE(instance)
1071   hvp = cluster.FillHV(instance)
1072   args = {
1073     "name": instance.name,
1074     "primary_node": instance.primary_node,
1075     "secondary_nodes": instance.secondary_nodes,
1076     "os_type": instance.os,
1077     "status": instance.admin_up,
1078     "memory": bep[constants.BE_MEMORY],
1079     "vcpus": bep[constants.BE_VCPUS],
1080     "nics": _NICListToTuple(lu, instance.nics),
1081     "disk_template": instance.disk_template,
1082     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083     "bep": bep,
1084     "hvp": hvp,
1085     "hypervisor_name": instance.hypervisor,
1086     "tags": instance.tags,
1087   }
1088   if override:
1089     args.update(override)
1090   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091
1092
1093 def _AdjustCandidatePool(lu, exceptions):
1094   """Adjust the candidate pool after node operations.
1095
1096   """
1097   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098   if mod_list:
1099     lu.LogInfo("Promoted nodes to master candidate role: %s",
1100                utils.CommaJoin(node.name for node in mod_list))
1101     for name in mod_list:
1102       lu.context.ReaddNode(name)
1103   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104   if mc_now > mc_max:
1105     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106                (mc_now, mc_max))
1107
1108
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110   """Decide whether I should promote myself as a master candidate.
1111
1112   """
1113   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115   # the new node will increase mc_max with one, so:
1116   mc_should = min(mc_should + 1, cp_size)
1117   return mc_now < mc_should
1118
1119
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121   """Check that the brigdes needed by a list of nics exist.
1122
1123   """
1124   cluster = lu.cfg.GetClusterInfo()
1125   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126   brlist = [params[constants.NIC_LINK] for params in paramslist
1127             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128   if brlist:
1129     result = lu.rpc.call_bridges_exist(target_node, brlist)
1130     result.Raise("Error checking bridges on destination node '%s'" %
1131                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132
1133
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135   """Check that the brigdes needed by an instance exist.
1136
1137   """
1138   if node is None:
1139     node = instance.primary_node
1140   _CheckNicsBridgesExist(lu, instance.nics, node)
1141
1142
1143 def _CheckOSVariant(os_obj, name):
1144   """Check whether an OS name conforms to the os variants specification.
1145
1146   @type os_obj: L{objects.OS}
1147   @param os_obj: OS object to check
1148   @type name: string
1149   @param name: OS name passed by the user, to check for validity
1150
1151   """
1152   variant = objects.OS.GetVariant(name)
1153   if not os_obj.supported_variants:
1154     if variant:
1155       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156                                  " passed)" % (os_obj.name, variant),
1157                                  errors.ECODE_INVAL)
1158     return
1159   if not variant:
1160     raise errors.OpPrereqError("OS name must include a variant",
1161                                errors.ECODE_INVAL)
1162
1163   if variant not in os_obj.supported_variants:
1164     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165
1166
1167 def _GetNodeInstancesInner(cfg, fn):
1168   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169
1170
1171 def _GetNodeInstances(cfg, node_name):
1172   """Returns a list of all primary and secondary instances on a node.
1173
1174   """
1175
1176   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177
1178
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180   """Returns primary instances on a node.
1181
1182   """
1183   return _GetNodeInstancesInner(cfg,
1184                                 lambda inst: node_name == inst.primary_node)
1185
1186
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188   """Returns secondary instances on a node.
1189
1190   """
1191   return _GetNodeInstancesInner(cfg,
1192                                 lambda inst: node_name in inst.secondary_nodes)
1193
1194
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196   """Returns the arguments for a storage type.
1197
1198   """
1199   # Special case for file storage
1200   if storage_type == constants.ST_FILE:
1201     # storage.FileStorage wants a list of storage directories
1202     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203
1204   return []
1205
1206
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208   faulty = []
1209
1210   for dev in instance.disks:
1211     cfg.SetDiskID(dev, node_name)
1212
1213   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214   result.Raise("Failed to get disk status from node %s" % node_name,
1215                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216
1217   for idx, bdev_status in enumerate(result.payload):
1218     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219       faulty.append(idx)
1220
1221   return faulty
1222
1223
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225   """Check the sanity of iallocator and node arguments and use the
1226   cluster-wide iallocator if appropriate.
1227
1228   Check that at most one of (iallocator, node) is specified. If none is
1229   specified, then the LU's opcode's iallocator slot is filled with the
1230   cluster-wide default iallocator.
1231
1232   @type iallocator_slot: string
1233   @param iallocator_slot: the name of the opcode iallocator slot
1234   @type node_slot: string
1235   @param node_slot: the name of the opcode target node slot
1236
1237   """
1238   node = getattr(lu.op, node_slot, None)
1239   iallocator = getattr(lu.op, iallocator_slot, None)
1240
1241   if node is not None and iallocator is not None:
1242     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243                                errors.ECODE_INVAL)
1244   elif node is None and iallocator is None:
1245     default_iallocator = lu.cfg.GetDefaultIAllocator()
1246     if default_iallocator:
1247       setattr(lu.op, iallocator_slot, default_iallocator)
1248     else:
1249       raise errors.OpPrereqError("No iallocator or node given and no"
1250                                  " cluster-wide default iallocator found;"
1251                                  " please specify either an iallocator or a"
1252                                  " node, or set a cluster-wide default"
1253                                  " iallocator")
1254
1255
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257   """Decides on which iallocator to use.
1258
1259   @type cfg: L{config.ConfigWriter}
1260   @param cfg: Cluster configuration object
1261   @type iallocator: string or None
1262   @param iallocator: Iallocator specified in opcode
1263   @rtype: string
1264   @return: Iallocator name
1265
1266   """
1267   if not iallocator:
1268     # Use default iallocator
1269     iallocator = cfg.GetDefaultIAllocator()
1270
1271   if not iallocator:
1272     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273                                " opcode nor as a cluster-wide default",
1274                                errors.ECODE_INVAL)
1275
1276   return iallocator
1277
1278
1279 class LUClusterPostInit(LogicalUnit):
1280   """Logical unit for running hooks after cluster initialization.
1281
1282   """
1283   HPATH = "cluster-init"
1284   HTYPE = constants.HTYPE_CLUSTER
1285
1286   def BuildHooksEnv(self):
1287     """Build hooks env.
1288
1289     """
1290     return {
1291       "OP_TARGET": self.cfg.GetClusterName(),
1292       }
1293
1294   def BuildHooksNodes(self):
1295     """Build hooks nodes.
1296
1297     """
1298     return ([], [self.cfg.GetMasterNode()])
1299
1300   def Exec(self, feedback_fn):
1301     """Nothing to do.
1302
1303     """
1304     return True
1305
1306
1307 class LUClusterDestroy(LogicalUnit):
1308   """Logical unit for destroying the cluster.
1309
1310   """
1311   HPATH = "cluster-destroy"
1312   HTYPE = constants.HTYPE_CLUSTER
1313
1314   def BuildHooksEnv(self):
1315     """Build hooks env.
1316
1317     """
1318     return {
1319       "OP_TARGET": self.cfg.GetClusterName(),
1320       }
1321
1322   def BuildHooksNodes(self):
1323     """Build hooks nodes.
1324
1325     """
1326     return ([], [])
1327
1328   def CheckPrereq(self):
1329     """Check prerequisites.
1330
1331     This checks whether the cluster is empty.
1332
1333     Any errors are signaled by raising errors.OpPrereqError.
1334
1335     """
1336     master = self.cfg.GetMasterNode()
1337
1338     nodelist = self.cfg.GetNodeList()
1339     if len(nodelist) != 1 or nodelist[0] != master:
1340       raise errors.OpPrereqError("There are still %d node(s) in"
1341                                  " this cluster." % (len(nodelist) - 1),
1342                                  errors.ECODE_INVAL)
1343     instancelist = self.cfg.GetInstanceList()
1344     if instancelist:
1345       raise errors.OpPrereqError("There are still %d instance(s) in"
1346                                  " this cluster." % len(instancelist),
1347                                  errors.ECODE_INVAL)
1348
1349   def Exec(self, feedback_fn):
1350     """Destroys the cluster.
1351
1352     """
1353     master = self.cfg.GetMasterNode()
1354
1355     # Run post hooks on master node before it's removed
1356     _RunPostHook(self, master)
1357
1358     result = self.rpc.call_node_stop_master(master, False)
1359     result.Raise("Could not disable the master role")
1360
1361     return master
1362
1363
1364 def _VerifyCertificate(filename):
1365   """Verifies a certificate for L{LUClusterVerifyConfig}.
1366
1367   @type filename: string
1368   @param filename: Path to PEM file
1369
1370   """
1371   try:
1372     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373                                            utils.ReadFile(filename))
1374   except Exception, err: # pylint: disable=W0703
1375     return (LUClusterVerifyConfig.ETYPE_ERROR,
1376             "Failed to load X509 certificate %s: %s" % (filename, err))
1377
1378   (errcode, msg) = \
1379     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380                                 constants.SSL_CERT_EXPIRATION_ERROR)
1381
1382   if msg:
1383     fnamemsg = "While verifying %s: %s" % (filename, msg)
1384   else:
1385     fnamemsg = None
1386
1387   if errcode is None:
1388     return (None, fnamemsg)
1389   elif errcode == utils.CERT_WARNING:
1390     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391   elif errcode == utils.CERT_ERROR:
1392     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393
1394   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395
1396
1397 def _GetAllHypervisorParameters(cluster, instances):
1398   """Compute the set of all hypervisor parameters.
1399
1400   @type cluster: L{objects.Cluster}
1401   @param cluster: the cluster object
1402   @param instances: list of L{objects.Instance}
1403   @param instances: additional instances from which to obtain parameters
1404   @rtype: list of (origin, hypervisor, parameters)
1405   @return: a list with all parameters found, indicating the hypervisor they
1406        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407
1408   """
1409   hvp_data = []
1410
1411   for hv_name in cluster.enabled_hypervisors:
1412     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413
1414   for os_name, os_hvp in cluster.os_hvp.items():
1415     for hv_name, hv_params in os_hvp.items():
1416       if hv_params:
1417         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419
1420   # TODO: collapse identical parameter values in a single one
1421   for instance in instances:
1422     if instance.hvparams:
1423       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424                        cluster.FillHV(instance)))
1425
1426   return hvp_data
1427
1428
1429 class _VerifyErrors(object):
1430   """Mix-in for cluster/group verify LUs.
1431
1432   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433   self.op and self._feedback_fn to be available.)
1434
1435   """
1436   TCLUSTER = "cluster"
1437   TNODE = "node"
1438   TINSTANCE = "instance"
1439
1440   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443   ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444   ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452   ENODEDRBD = (TNODE, "ENODEDRBD")
1453   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456   ENODEHV = (TNODE, "ENODEHV")
1457   ENODELVM = (TNODE, "ENODELVM")
1458   ENODEN1 = (TNODE, "ENODEN1")
1459   ENODENET = (TNODE, "ENODENET")
1460   ENODEOS = (TNODE, "ENODEOS")
1461   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463   ENODERPC = (TNODE, "ENODERPC")
1464   ENODESSH = (TNODE, "ENODESSH")
1465   ENODEVERSION = (TNODE, "ENODEVERSION")
1466   ENODESETUP = (TNODE, "ENODESETUP")
1467   ENODETIME = (TNODE, "ENODETIME")
1468   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469
1470   ETYPE_FIELD = "code"
1471   ETYPE_ERROR = "ERROR"
1472   ETYPE_WARNING = "WARNING"
1473
1474   def _Error(self, ecode, item, msg, *args, **kwargs):
1475     """Format an error message.
1476
1477     Based on the opcode's error_codes parameter, either format a
1478     parseable error code, or a simpler error string.
1479
1480     This must be called only from Exec and functions called from Exec.
1481
1482     """
1483     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484     itype, etxt = ecode
1485     # first complete the msg
1486     if args:
1487       msg = msg % args
1488     # then format the whole message
1489     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491     else:
1492       if item:
1493         item = " " + item
1494       else:
1495         item = ""
1496       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497     # and finally report it via the feedback_fn
1498     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499
1500   def _ErrorIf(self, cond, *args, **kwargs):
1501     """Log an error message if the passed condition is True.
1502
1503     """
1504     cond = (bool(cond)
1505             or self.op.debug_simulate_errors) # pylint: disable=E1101
1506     if cond:
1507       self._Error(*args, **kwargs)
1508     # do not mark the operation as failed for WARN cases only
1509     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510       self.bad = self.bad or cond
1511
1512
1513 class LUClusterVerify(NoHooksLU):
1514   """Submits all jobs necessary to verify the cluster.
1515
1516   """
1517   REQ_BGL = False
1518
1519   def ExpandNames(self):
1520     self.needed_locks = {}
1521
1522   def Exec(self, feedback_fn):
1523     jobs = []
1524
1525     if self.op.group_name:
1526       groups = [self.op.group_name]
1527       depends_fn = lambda: None
1528     else:
1529       groups = self.cfg.GetNodeGroupList()
1530
1531       # Verify global configuration
1532       jobs.append([opcodes.OpClusterVerifyConfig()])
1533
1534       # Always depend on global verification
1535       depends_fn = lambda: [(-len(jobs), [])]
1536
1537     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538                                               depends=depends_fn())]
1539                 for group in groups)
1540
1541     # Fix up all parameters
1542     for op in itertools.chain(*jobs): # pylint: disable=W0142
1543       op.debug_simulate_errors = self.op.debug_simulate_errors
1544       op.verbose = self.op.verbose
1545       op.error_codes = self.op.error_codes
1546       try:
1547         op.skip_checks = self.op.skip_checks
1548       except AttributeError:
1549         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550
1551     return ResultWithJobs(jobs)
1552
1553
1554 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555   """Verifies the cluster config.
1556
1557   """
1558   REQ_BGL = True
1559
1560   def _VerifyHVP(self, hvp_data):
1561     """Verifies locally the syntax of the hypervisor parameters.
1562
1563     """
1564     for item, hv_name, hv_params in hvp_data:
1565       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566              (item, hv_name))
1567       try:
1568         hv_class = hypervisor.GetHypervisor(hv_name)
1569         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570         hv_class.CheckParameterSyntax(hv_params)
1571       except errors.GenericError, err:
1572         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573
1574   def ExpandNames(self):
1575     # Information can be safely retrieved as the BGL is acquired in exclusive
1576     # mode
1577     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579     self.all_node_info = self.cfg.GetAllNodesInfo()
1580     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581     self.needed_locks = {}
1582
1583   def Exec(self, feedback_fn):
1584     """Verify integrity of cluster, performing various test on nodes.
1585
1586     """
1587     self.bad = False
1588     self._feedback_fn = feedback_fn
1589
1590     feedback_fn("* Verifying cluster config")
1591
1592     for msg in self.cfg.VerifyConfig():
1593       self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594
1595     feedback_fn("* Verifying cluster certificate files")
1596
1597     for cert_filename in constants.ALL_CERT_FILES:
1598       (errcode, msg) = _VerifyCertificate(cert_filename)
1599       self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600
1601     feedback_fn("* Verifying hypervisor parameters")
1602
1603     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604                                                 self.all_inst_info.values()))
1605
1606     feedback_fn("* Verifying all nodes belong to an existing group")
1607
1608     # We do this verification here because, should this bogus circumstance
1609     # occur, it would never be caught by VerifyGroup, which only acts on
1610     # nodes/instances reachable from existing node groups.
1611
1612     dangling_nodes = set(node.name for node in self.all_node_info.values()
1613                          if node.group not in self.all_group_info)
1614
1615     dangling_instances = {}
1616     no_node_instances = []
1617
1618     for inst in self.all_inst_info.values():
1619       if inst.primary_node in dangling_nodes:
1620         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621       elif inst.primary_node not in self.all_node_info:
1622         no_node_instances.append(inst.name)
1623
1624     pretty_dangling = [
1625         "%s (%s)" %
1626         (node.name,
1627          utils.CommaJoin(dangling_instances.get(node.name,
1628                                                 ["no instances"])))
1629         for node in dangling_nodes]
1630
1631     self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632                   "the following nodes (and their instances) belong to a non"
1633                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1634
1635     self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636                   "the following instances have a non-existing primary-node:"
1637                   " %s", utils.CommaJoin(no_node_instances))
1638
1639     return not self.bad
1640
1641
1642 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643   """Verifies the status of a node group.
1644
1645   """
1646   HPATH = "cluster-verify"
1647   HTYPE = constants.HTYPE_CLUSTER
1648   REQ_BGL = False
1649
1650   _HOOKS_INDENT_RE = re.compile("^", re.M)
1651
1652   class NodeImage(object):
1653     """A class representing the logical and physical status of a node.
1654
1655     @type name: string
1656     @ivar name: the node name to which this object refers
1657     @ivar volumes: a structure as returned from
1658         L{ganeti.backend.GetVolumeList} (runtime)
1659     @ivar instances: a list of running instances (runtime)
1660     @ivar pinst: list of configured primary instances (config)
1661     @ivar sinst: list of configured secondary instances (config)
1662     @ivar sbp: dictionary of {primary-node: list of instances} for all
1663         instances for which this node is secondary (config)
1664     @ivar mfree: free memory, as reported by hypervisor (runtime)
1665     @ivar dfree: free disk, as reported by the node (runtime)
1666     @ivar offline: the offline status (config)
1667     @type rpc_fail: boolean
1668     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669         not whether the individual keys were correct) (runtime)
1670     @type lvm_fail: boolean
1671     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672     @type hyp_fail: boolean
1673     @ivar hyp_fail: whether the RPC call didn't return the instance list
1674     @type ghost: boolean
1675     @ivar ghost: whether this is a known node or not (config)
1676     @type os_fail: boolean
1677     @ivar os_fail: whether the RPC call didn't return valid OS data
1678     @type oslist: list
1679     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680     @type vm_capable: boolean
1681     @ivar vm_capable: whether the node can host instances
1682
1683     """
1684     def __init__(self, offline=False, name=None, vm_capable=True):
1685       self.name = name
1686       self.volumes = {}
1687       self.instances = []
1688       self.pinst = []
1689       self.sinst = []
1690       self.sbp = {}
1691       self.mfree = 0
1692       self.dfree = 0
1693       self.offline = offline
1694       self.vm_capable = vm_capable
1695       self.rpc_fail = False
1696       self.lvm_fail = False
1697       self.hyp_fail = False
1698       self.ghost = False
1699       self.os_fail = False
1700       self.oslist = {}
1701
1702   def ExpandNames(self):
1703     # This raises errors.OpPrereqError on its own:
1704     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705
1706     # Get instances in node group; this is unsafe and needs verification later
1707     inst_names = \
1708       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1709
1710     self.needed_locks = {
1711       locking.LEVEL_INSTANCE: inst_names,
1712       locking.LEVEL_NODEGROUP: [self.group_uuid],
1713       locking.LEVEL_NODE: [],
1714       }
1715
1716     self.share_locks = _ShareAll()
1717
1718   def DeclareLocks(self, level):
1719     if level == locking.LEVEL_NODE:
1720       # Get members of node group; this is unsafe and needs verification later
1721       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1722
1723       all_inst_info = self.cfg.GetAllInstancesInfo()
1724
1725       # In Exec(), we warn about mirrored instances that have primary and
1726       # secondary living in separate node groups. To fully verify that
1727       # volumes for these instances are healthy, we will need to do an
1728       # extra call to their secondaries. We ensure here those nodes will
1729       # be locked.
1730       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1731         # Important: access only the instances whose lock is owned
1732         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1733           nodes.update(all_inst_info[inst].secondary_nodes)
1734
1735       self.needed_locks[locking.LEVEL_NODE] = nodes
1736
1737   def CheckPrereq(self):
1738     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1739     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1740
1741     group_nodes = set(self.group_info.members)
1742     group_instances = \
1743       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1744
1745     unlocked_nodes = \
1746         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1747
1748     unlocked_instances = \
1749         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1750
1751     if unlocked_nodes:
1752       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1753                                  utils.CommaJoin(unlocked_nodes),
1754                                  errors.ECODE_STATE)
1755
1756     if unlocked_instances:
1757       raise errors.OpPrereqError("Missing lock for instances: %s" %
1758                                  utils.CommaJoin(unlocked_instances),
1759                                  errors.ECODE_STATE)
1760
1761     self.all_node_info = self.cfg.GetAllNodesInfo()
1762     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1763
1764     self.my_node_names = utils.NiceSort(group_nodes)
1765     self.my_inst_names = utils.NiceSort(group_instances)
1766
1767     self.my_node_info = dict((name, self.all_node_info[name])
1768                              for name in self.my_node_names)
1769
1770     self.my_inst_info = dict((name, self.all_inst_info[name])
1771                              for name in self.my_inst_names)
1772
1773     # We detect here the nodes that will need the extra RPC calls for verifying
1774     # split LV volumes; they should be locked.
1775     extra_lv_nodes = set()
1776
1777     for inst in self.my_inst_info.values():
1778       if inst.disk_template in constants.DTS_INT_MIRROR:
1779         for nname in inst.all_nodes:
1780           if self.all_node_info[nname].group != self.group_uuid:
1781             extra_lv_nodes.add(nname)
1782
1783     unlocked_lv_nodes = \
1784         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1785
1786     if unlocked_lv_nodes:
1787       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1788                                  utils.CommaJoin(unlocked_lv_nodes),
1789                                  errors.ECODE_STATE)
1790     self.extra_lv_nodes = list(extra_lv_nodes)
1791
1792   def _VerifyNode(self, ninfo, nresult):
1793     """Perform some basic validation on data returned from a node.
1794
1795       - check the result data structure is well formed and has all the
1796         mandatory fields
1797       - check ganeti version
1798
1799     @type ninfo: L{objects.Node}
1800     @param ninfo: the node to check
1801     @param nresult: the results from the node
1802     @rtype: boolean
1803     @return: whether overall this call was successful (and we can expect
1804          reasonable values in the respose)
1805
1806     """
1807     node = ninfo.name
1808     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1809
1810     # main result, nresult should be a non-empty dict
1811     test = not nresult or not isinstance(nresult, dict)
1812     _ErrorIf(test, self.ENODERPC, node,
1813                   "unable to verify node: no data returned")
1814     if test:
1815       return False
1816
1817     # compares ganeti version
1818     local_version = constants.PROTOCOL_VERSION
1819     remote_version = nresult.get("version", None)
1820     test = not (remote_version and
1821                 isinstance(remote_version, (list, tuple)) and
1822                 len(remote_version) == 2)
1823     _ErrorIf(test, self.ENODERPC, node,
1824              "connection to node returned invalid data")
1825     if test:
1826       return False
1827
1828     test = local_version != remote_version[0]
1829     _ErrorIf(test, self.ENODEVERSION, node,
1830              "incompatible protocol versions: master %s,"
1831              " node %s", local_version, remote_version[0])
1832     if test:
1833       return False
1834
1835     # node seems compatible, we can actually try to look into its results
1836
1837     # full package version
1838     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1839                   self.ENODEVERSION, node,
1840                   "software version mismatch: master %s, node %s",
1841                   constants.RELEASE_VERSION, remote_version[1],
1842                   code=self.ETYPE_WARNING)
1843
1844     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1845     if ninfo.vm_capable and isinstance(hyp_result, dict):
1846       for hv_name, hv_result in hyp_result.iteritems():
1847         test = hv_result is not None
1848         _ErrorIf(test, self.ENODEHV, node,
1849                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1850
1851     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1852     if ninfo.vm_capable and isinstance(hvp_result, list):
1853       for item, hv_name, hv_result in hvp_result:
1854         _ErrorIf(True, self.ENODEHV, node,
1855                  "hypervisor %s parameter verify failure (source %s): %s",
1856                  hv_name, item, hv_result)
1857
1858     test = nresult.get(constants.NV_NODESETUP,
1859                        ["Missing NODESETUP results"])
1860     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1861              "; ".join(test))
1862
1863     return True
1864
1865   def _VerifyNodeTime(self, ninfo, nresult,
1866                       nvinfo_starttime, nvinfo_endtime):
1867     """Check the node time.
1868
1869     @type ninfo: L{objects.Node}
1870     @param ninfo: the node to check
1871     @param nresult: the remote results for the node
1872     @param nvinfo_starttime: the start time of the RPC call
1873     @param nvinfo_endtime: the end time of the RPC call
1874
1875     """
1876     node = ninfo.name
1877     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1878
1879     ntime = nresult.get(constants.NV_TIME, None)
1880     try:
1881       ntime_merged = utils.MergeTime(ntime)
1882     except (ValueError, TypeError):
1883       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1884       return
1885
1886     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1887       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1888     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1889       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1890     else:
1891       ntime_diff = None
1892
1893     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1894              "Node time diverges by at least %s from master node time",
1895              ntime_diff)
1896
1897   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1898     """Check the node LVM results.
1899
1900     @type ninfo: L{objects.Node}
1901     @param ninfo: the node to check
1902     @param nresult: the remote results for the node
1903     @param vg_name: the configured VG name
1904
1905     """
1906     if vg_name is None:
1907       return
1908
1909     node = ninfo.name
1910     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1911
1912     # checks vg existence and size > 20G
1913     vglist = nresult.get(constants.NV_VGLIST, None)
1914     test = not vglist
1915     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1916     if not test:
1917       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1918                                             constants.MIN_VG_SIZE)
1919       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1920
1921     # check pv names
1922     pvlist = nresult.get(constants.NV_PVLIST, None)
1923     test = pvlist is None
1924     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1925     if not test:
1926       # check that ':' is not present in PV names, since it's a
1927       # special character for lvcreate (denotes the range of PEs to
1928       # use on the PV)
1929       for _, pvname, owner_vg in pvlist:
1930         test = ":" in pvname
1931         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1932                  " '%s' of VG '%s'", pvname, owner_vg)
1933
1934   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1935     """Check the node bridges.
1936
1937     @type ninfo: L{objects.Node}
1938     @param ninfo: the node to check
1939     @param nresult: the remote results for the node
1940     @param bridges: the expected list of bridges
1941
1942     """
1943     if not bridges:
1944       return
1945
1946     node = ninfo.name
1947     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1948
1949     missing = nresult.get(constants.NV_BRIDGES, None)
1950     test = not isinstance(missing, list)
1951     _ErrorIf(test, self.ENODENET, node,
1952              "did not return valid bridge information")
1953     if not test:
1954       _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1955                utils.CommaJoin(sorted(missing)))
1956
1957   def _VerifyNodeNetwork(self, ninfo, nresult):
1958     """Check the node network connectivity results.
1959
1960     @type ninfo: L{objects.Node}
1961     @param ninfo: the node to check
1962     @param nresult: the remote results for the node
1963
1964     """
1965     node = ninfo.name
1966     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1967
1968     test = constants.NV_NODELIST not in nresult
1969     _ErrorIf(test, self.ENODESSH, node,
1970              "node hasn't returned node ssh connectivity data")
1971     if not test:
1972       if nresult[constants.NV_NODELIST]:
1973         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1974           _ErrorIf(True, self.ENODESSH, node,
1975                    "ssh communication with node '%s': %s", a_node, a_msg)
1976
1977     test = constants.NV_NODENETTEST not in nresult
1978     _ErrorIf(test, self.ENODENET, node,
1979              "node hasn't returned node tcp connectivity data")
1980     if not test:
1981       if nresult[constants.NV_NODENETTEST]:
1982         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1983         for anode in nlist:
1984           _ErrorIf(True, self.ENODENET, node,
1985                    "tcp communication with node '%s': %s",
1986                    anode, nresult[constants.NV_NODENETTEST][anode])
1987
1988     test = constants.NV_MASTERIP not in nresult
1989     _ErrorIf(test, self.ENODENET, node,
1990              "node hasn't returned node master IP reachability data")
1991     if not test:
1992       if not nresult[constants.NV_MASTERIP]:
1993         if node == self.master_node:
1994           msg = "the master node cannot reach the master IP (not configured?)"
1995         else:
1996           msg = "cannot reach the master IP"
1997         _ErrorIf(True, self.ENODENET, node, msg)
1998
1999   def _VerifyInstance(self, instance, instanceconfig, node_image,
2000                       diskstatus):
2001     """Verify an instance.
2002
2003     This function checks to see if the required block devices are
2004     available on the instance's node.
2005
2006     """
2007     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2008     node_current = instanceconfig.primary_node
2009
2010     node_vol_should = {}
2011     instanceconfig.MapLVsByNode(node_vol_should)
2012
2013     for node in node_vol_should:
2014       n_img = node_image[node]
2015       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2016         # ignore missing volumes on offline or broken nodes
2017         continue
2018       for volume in node_vol_should[node]:
2019         test = volume not in n_img.volumes
2020         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2021                  "volume %s missing on node %s", volume, node)
2022
2023     if instanceconfig.admin_up:
2024       pri_img = node_image[node_current]
2025       test = instance not in pri_img.instances and not pri_img.offline
2026       _ErrorIf(test, self.EINSTANCEDOWN, instance,
2027                "instance not running on its primary node %s",
2028                node_current)
2029
2030     diskdata = [(nname, success, status, idx)
2031                 for (nname, disks) in diskstatus.items()
2032                 for idx, (success, status) in enumerate(disks)]
2033
2034     for nname, success, bdev_status, idx in diskdata:
2035       # the 'ghost node' construction in Exec() ensures that we have a
2036       # node here
2037       snode = node_image[nname]
2038       bad_snode = snode.ghost or snode.offline
2039       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2040                self.EINSTANCEFAULTYDISK, instance,
2041                "couldn't retrieve status for disk/%s on %s: %s",
2042                idx, nname, bdev_status)
2043       _ErrorIf((instanceconfig.admin_up and success and
2044                 bdev_status.ldisk_status == constants.LDS_FAULTY),
2045                self.EINSTANCEFAULTYDISK, instance,
2046                "disk/%s on %s is faulty", idx, nname)
2047
2048   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2049     """Verify if there are any unknown volumes in the cluster.
2050
2051     The .os, .swap and backup volumes are ignored. All other volumes are
2052     reported as unknown.
2053
2054     @type reserved: L{ganeti.utils.FieldSet}
2055     @param reserved: a FieldSet of reserved volume names
2056
2057     """
2058     for node, n_img in node_image.items():
2059       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2060           self.all_node_info[node].group != self.group_uuid):
2061         # skip non-healthy nodes
2062         continue
2063       for volume in n_img.volumes:
2064         test = ((node not in node_vol_should or
2065                 volume not in node_vol_should[node]) and
2066                 not reserved.Matches(volume))
2067         self._ErrorIf(test, self.ENODEORPHANLV, node,
2068                       "volume %s is unknown", volume)
2069
2070   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2071     """Verify N+1 Memory Resilience.
2072
2073     Check that if one single node dies we can still start all the
2074     instances it was primary for.
2075
2076     """
2077     cluster_info = self.cfg.GetClusterInfo()
2078     for node, n_img in node_image.items():
2079       # This code checks that every node which is now listed as
2080       # secondary has enough memory to host all instances it is
2081       # supposed to should a single other node in the cluster fail.
2082       # FIXME: not ready for failover to an arbitrary node
2083       # FIXME: does not support file-backed instances
2084       # WARNING: we currently take into account down instances as well
2085       # as up ones, considering that even if they're down someone
2086       # might want to start them even in the event of a node failure.
2087       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2088         # we're skipping nodes marked offline and nodes in other groups from
2089         # the N+1 warning, since most likely we don't have good memory
2090         # infromation from them; we already list instances living on such
2091         # nodes, and that's enough warning
2092         continue
2093       for prinode, instances in n_img.sbp.items():
2094         needed_mem = 0
2095         for instance in instances:
2096           bep = cluster_info.FillBE(instance_cfg[instance])
2097           if bep[constants.BE_AUTO_BALANCE]:
2098             needed_mem += bep[constants.BE_MEMORY]
2099         test = n_img.mfree < needed_mem
2100         self._ErrorIf(test, self.ENODEN1, node,
2101                       "not enough memory to accomodate instance failovers"
2102                       " should node %s fail (%dMiB needed, %dMiB available)",
2103                       prinode, needed_mem, n_img.mfree)
2104
2105   @classmethod
2106   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2107                    (files_all, files_all_opt, files_mc, files_vm)):
2108     """Verifies file checksums collected from all nodes.
2109
2110     @param errorif: Callback for reporting errors
2111     @param nodeinfo: List of L{objects.Node} objects
2112     @param master_node: Name of master node
2113     @param all_nvinfo: RPC results
2114
2115     """
2116     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2117             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2118            "Found file listed in more than one file list"
2119
2120     # Define functions determining which nodes to consider for a file
2121     files2nodefn = [
2122       (files_all, None),
2123       (files_all_opt, None),
2124       (files_mc, lambda node: (node.master_candidate or
2125                                node.name == master_node)),
2126       (files_vm, lambda node: node.vm_capable),
2127       ]
2128
2129     # Build mapping from filename to list of nodes which should have the file
2130     nodefiles = {}
2131     for (files, fn) in files2nodefn:
2132       if fn is None:
2133         filenodes = nodeinfo
2134       else:
2135         filenodes = filter(fn, nodeinfo)
2136       nodefiles.update((filename,
2137                         frozenset(map(operator.attrgetter("name"), filenodes)))
2138                        for filename in files)
2139
2140     assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2141
2142     fileinfo = dict((filename, {}) for filename in nodefiles)
2143     ignore_nodes = set()
2144
2145     for node in nodeinfo:
2146       if node.offline:
2147         ignore_nodes.add(node.name)
2148         continue
2149
2150       nresult = all_nvinfo[node.name]
2151
2152       if nresult.fail_msg or not nresult.payload:
2153         node_files = None
2154       else:
2155         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2156
2157       test = not (node_files and isinstance(node_files, dict))
2158       errorif(test, cls.ENODEFILECHECK, node.name,
2159               "Node did not return file checksum data")
2160       if test:
2161         ignore_nodes.add(node.name)
2162         continue
2163
2164       # Build per-checksum mapping from filename to nodes having it
2165       for (filename, checksum) in node_files.items():
2166         assert filename in nodefiles
2167         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2168
2169     for (filename, checksums) in fileinfo.items():
2170       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2171
2172       # Nodes having the file
2173       with_file = frozenset(node_name
2174                             for nodes in fileinfo[filename].values()
2175                             for node_name in nodes) - ignore_nodes
2176
2177       expected_nodes = nodefiles[filename] - ignore_nodes
2178
2179       # Nodes missing file
2180       missing_file = expected_nodes - with_file
2181
2182       if filename in files_all_opt:
2183         # All or no nodes
2184         errorif(missing_file and missing_file != expected_nodes,
2185                 cls.ECLUSTERFILECHECK, None,
2186                 "File %s is optional, but it must exist on all or no"
2187                 " nodes (not found on %s)",
2188                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2189       else:
2190         # Non-optional files
2191         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2192                 "File %s is missing from node(s) %s", filename,
2193                 utils.CommaJoin(utils.NiceSort(missing_file)))
2194
2195         # Warn if a node has a file it shouldn't
2196         unexpected = with_file - expected_nodes
2197         errorif(unexpected,
2198                 cls.ECLUSTERFILECHECK, None,
2199                 "File %s should not exist on node(s) %s",
2200                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2201
2202       # See if there are multiple versions of the file
2203       test = len(checksums) > 1
2204       if test:
2205         variants = ["variant %s on %s" %
2206                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2207                     for (idx, (checksum, nodes)) in
2208                       enumerate(sorted(checksums.items()))]
2209       else:
2210         variants = []
2211
2212       errorif(test, cls.ECLUSTERFILECHECK, None,
2213               "File %s found with %s different checksums (%s)",
2214               filename, len(checksums), "; ".join(variants))
2215
2216   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2217                       drbd_map):
2218     """Verifies and the node DRBD status.
2219
2220     @type ninfo: L{objects.Node}
2221     @param ninfo: the node to check
2222     @param nresult: the remote results for the node
2223     @param instanceinfo: the dict of instances
2224     @param drbd_helper: the configured DRBD usermode helper
2225     @param drbd_map: the DRBD map as returned by
2226         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2227
2228     """
2229     node = ninfo.name
2230     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2231
2232     if drbd_helper:
2233       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2234       test = (helper_result == None)
2235       _ErrorIf(test, self.ENODEDRBDHELPER, node,
2236                "no drbd usermode helper returned")
2237       if helper_result:
2238         status, payload = helper_result
2239         test = not status
2240         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2241                  "drbd usermode helper check unsuccessful: %s", payload)
2242         test = status and (payload != drbd_helper)
2243         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2244                  "wrong drbd usermode helper: %s", payload)
2245
2246     # compute the DRBD minors
2247     node_drbd = {}
2248     for minor, instance in drbd_map[node].items():
2249       test = instance not in instanceinfo
2250       _ErrorIf(test, self.ECLUSTERCFG, None,
2251                "ghost instance '%s' in temporary DRBD map", instance)
2252         # ghost instance should not be running, but otherwise we
2253         # don't give double warnings (both ghost instance and
2254         # unallocated minor in use)
2255       if test:
2256         node_drbd[minor] = (instance, False)
2257       else:
2258         instance = instanceinfo[instance]
2259         node_drbd[minor] = (instance.name, instance.admin_up)
2260
2261     # and now check them
2262     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2263     test = not isinstance(used_minors, (tuple, list))
2264     _ErrorIf(test, self.ENODEDRBD, node,
2265              "cannot parse drbd status file: %s", str(used_minors))
2266     if test:
2267       # we cannot check drbd status
2268       return
2269
2270     for minor, (iname, must_exist) in node_drbd.items():
2271       test = minor not in used_minors and must_exist
2272       _ErrorIf(test, self.ENODEDRBD, node,
2273                "drbd minor %d of instance %s is not active", minor, iname)
2274     for minor in used_minors:
2275       test = minor not in node_drbd
2276       _ErrorIf(test, self.ENODEDRBD, node,
2277                "unallocated drbd minor %d is in use", minor)
2278
2279   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2280     """Builds the node OS structures.
2281
2282     @type ninfo: L{objects.Node}
2283     @param ninfo: the node to check
2284     @param nresult: the remote results for the node
2285     @param nimg: the node image object
2286
2287     """
2288     node = ninfo.name
2289     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2290
2291     remote_os = nresult.get(constants.NV_OSLIST, None)
2292     test = (not isinstance(remote_os, list) or
2293             not compat.all(isinstance(v, list) and len(v) == 7
2294                            for v in remote_os))
2295
2296     _ErrorIf(test, self.ENODEOS, node,
2297              "node hasn't returned valid OS data")
2298
2299     nimg.os_fail = test
2300
2301     if test:
2302       return
2303
2304     os_dict = {}
2305
2306     for (name, os_path, status, diagnose,
2307          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2308
2309       if name not in os_dict:
2310         os_dict[name] = []
2311
2312       # parameters is a list of lists instead of list of tuples due to
2313       # JSON lacking a real tuple type, fix it:
2314       parameters = [tuple(v) for v in parameters]
2315       os_dict[name].append((os_path, status, diagnose,
2316                             set(variants), set(parameters), set(api_ver)))
2317
2318     nimg.oslist = os_dict
2319
2320   def _VerifyNodeOS(self, ninfo, nimg, base):
2321     """Verifies the node OS list.
2322
2323     @type ninfo: L{objects.Node}
2324     @param ninfo: the node to check
2325     @param nimg: the node image object
2326     @param base: the 'template' node we match against (e.g. from the master)
2327
2328     """
2329     node = ninfo.name
2330     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2331
2332     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2333
2334     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2335     for os_name, os_data in nimg.oslist.items():
2336       assert os_data, "Empty OS status for OS %s?!" % os_name
2337       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2338       _ErrorIf(not f_status, self.ENODEOS, node,
2339                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2340       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2341                "OS '%s' has multiple entries (first one shadows the rest): %s",
2342                os_name, utils.CommaJoin([v[0] for v in os_data]))
2343       # comparisons with the 'base' image
2344       test = os_name not in base.oslist
2345       _ErrorIf(test, self.ENODEOS, node,
2346                "Extra OS %s not present on reference node (%s)",
2347                os_name, base.name)
2348       if test:
2349         continue
2350       assert base.oslist[os_name], "Base node has empty OS status?"
2351       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2352       if not b_status:
2353         # base OS is invalid, skipping
2354         continue
2355       for kind, a, b in [("API version", f_api, b_api),
2356                          ("variants list", f_var, b_var),
2357                          ("parameters", beautify_params(f_param),
2358                           beautify_params(b_param))]:
2359         _ErrorIf(a != b, self.ENODEOS, node,
2360                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2361                  kind, os_name, base.name,
2362                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2363
2364     # check any missing OSes
2365     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2366     _ErrorIf(missing, self.ENODEOS, node,
2367              "OSes present on reference node %s but missing on this node: %s",
2368              base.name, utils.CommaJoin(missing))
2369
2370   def _VerifyOob(self, ninfo, nresult):
2371     """Verifies out of band functionality of a node.
2372
2373     @type ninfo: L{objects.Node}
2374     @param ninfo: the node to check
2375     @param nresult: the remote results for the node
2376
2377     """
2378     node = ninfo.name
2379     # We just have to verify the paths on master and/or master candidates
2380     # as the oob helper is invoked on the master
2381     if ((ninfo.master_candidate or ninfo.master_capable) and
2382         constants.NV_OOB_PATHS in nresult):
2383       for path_result in nresult[constants.NV_OOB_PATHS]:
2384         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2385
2386   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2387     """Verifies and updates the node volume data.
2388
2389     This function will update a L{NodeImage}'s internal structures
2390     with data from the remote call.
2391
2392     @type ninfo: L{objects.Node}
2393     @param ninfo: the node to check
2394     @param nresult: the remote results for the node
2395     @param nimg: the node image object
2396     @param vg_name: the configured VG name
2397
2398     """
2399     node = ninfo.name
2400     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2401
2402     nimg.lvm_fail = True
2403     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2404     if vg_name is None:
2405       pass
2406     elif isinstance(lvdata, basestring):
2407       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2408                utils.SafeEncode(lvdata))
2409     elif not isinstance(lvdata, dict):
2410       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2411     else:
2412       nimg.volumes = lvdata
2413       nimg.lvm_fail = False
2414
2415   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2416     """Verifies and updates the node instance list.
2417
2418     If the listing was successful, then updates this node's instance
2419     list. Otherwise, it marks the RPC call as failed for the instance
2420     list key.
2421
2422     @type ninfo: L{objects.Node}
2423     @param ninfo: the node to check
2424     @param nresult: the remote results for the node
2425     @param nimg: the node image object
2426
2427     """
2428     idata = nresult.get(constants.NV_INSTANCELIST, None)
2429     test = not isinstance(idata, list)
2430     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2431                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2432     if test:
2433       nimg.hyp_fail = True
2434     else:
2435       nimg.instances = idata
2436
2437   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2438     """Verifies and computes a node information map
2439
2440     @type ninfo: L{objects.Node}
2441     @param ninfo: the node to check
2442     @param nresult: the remote results for the node
2443     @param nimg: the node image object
2444     @param vg_name: the configured VG name
2445
2446     """
2447     node = ninfo.name
2448     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2449
2450     # try to read free memory (from the hypervisor)
2451     hv_info = nresult.get(constants.NV_HVINFO, None)
2452     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2453     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2454     if not test:
2455       try:
2456         nimg.mfree = int(hv_info["memory_free"])
2457       except (ValueError, TypeError):
2458         _ErrorIf(True, self.ENODERPC, node,
2459                  "node returned invalid nodeinfo, check hypervisor")
2460
2461     # FIXME: devise a free space model for file based instances as well
2462     if vg_name is not None:
2463       test = (constants.NV_VGLIST not in nresult or
2464               vg_name not in nresult[constants.NV_VGLIST])
2465       _ErrorIf(test, self.ENODELVM, node,
2466                "node didn't return data for the volume group '%s'"
2467                " - it is either missing or broken", vg_name)
2468       if not test:
2469         try:
2470           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2471         except (ValueError, TypeError):
2472           _ErrorIf(True, self.ENODERPC, node,
2473                    "node returned invalid LVM info, check LVM status")
2474
2475   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2476     """Gets per-disk status information for all instances.
2477
2478     @type nodelist: list of strings
2479     @param nodelist: Node names
2480     @type node_image: dict of (name, L{objects.Node})
2481     @param node_image: Node objects
2482     @type instanceinfo: dict of (name, L{objects.Instance})
2483     @param instanceinfo: Instance objects
2484     @rtype: {instance: {node: [(succes, payload)]}}
2485     @return: a dictionary of per-instance dictionaries with nodes as
2486         keys and disk information as values; the disk information is a
2487         list of tuples (success, payload)
2488
2489     """
2490     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2491
2492     node_disks = {}
2493     node_disks_devonly = {}
2494     diskless_instances = set()
2495     diskless = constants.DT_DISKLESS
2496
2497     for nname in nodelist:
2498       node_instances = list(itertools.chain(node_image[nname].pinst,
2499                                             node_image[nname].sinst))
2500       diskless_instances.update(inst for inst in node_instances
2501                                 if instanceinfo[inst].disk_template == diskless)
2502       disks = [(inst, disk)
2503                for inst in node_instances
2504                for disk in instanceinfo[inst].disks]
2505
2506       if not disks:
2507         # No need to collect data
2508         continue
2509
2510       node_disks[nname] = disks
2511
2512       # Creating copies as SetDiskID below will modify the objects and that can
2513       # lead to incorrect data returned from nodes
2514       devonly = [dev.Copy() for (_, dev) in disks]
2515
2516       for dev in devonly:
2517         self.cfg.SetDiskID(dev, nname)
2518
2519       node_disks_devonly[nname] = devonly
2520
2521     assert len(node_disks) == len(node_disks_devonly)
2522
2523     # Collect data from all nodes with disks
2524     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2525                                                           node_disks_devonly)
2526
2527     assert len(result) == len(node_disks)
2528
2529     instdisk = {}
2530
2531     for (nname, nres) in result.items():
2532       disks = node_disks[nname]
2533
2534       if nres.offline:
2535         # No data from this node
2536         data = len(disks) * [(False, "node offline")]
2537       else:
2538         msg = nres.fail_msg
2539         _ErrorIf(msg, self.ENODERPC, nname,
2540                  "while getting disk information: %s", msg)
2541         if msg:
2542           # No data from this node
2543           data = len(disks) * [(False, msg)]
2544         else:
2545           data = []
2546           for idx, i in enumerate(nres.payload):
2547             if isinstance(i, (tuple, list)) and len(i) == 2:
2548               data.append(i)
2549             else:
2550               logging.warning("Invalid result from node %s, entry %d: %s",
2551                               nname, idx, i)
2552               data.append((False, "Invalid result from the remote node"))
2553
2554       for ((inst, _), status) in zip(disks, data):
2555         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2556
2557     # Add empty entries for diskless instances.
2558     for inst in diskless_instances:
2559       assert inst not in instdisk
2560       instdisk[inst] = {}
2561
2562     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2563                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2564                       compat.all(isinstance(s, (tuple, list)) and
2565                                  len(s) == 2 for s in statuses)
2566                       for inst, nnames in instdisk.items()
2567                       for nname, statuses in nnames.items())
2568     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2569
2570     return instdisk
2571
2572   @staticmethod
2573   def _SshNodeSelector(group_uuid, all_nodes):
2574     """Create endless iterators for all potential SSH check hosts.
2575
2576     """
2577     nodes = [node for node in all_nodes
2578              if (node.group != group_uuid and
2579                  not node.offline)]
2580     keyfunc = operator.attrgetter("group")
2581
2582     return map(itertools.cycle,
2583                [sorted(map(operator.attrgetter("name"), names))
2584                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2585                                                   keyfunc)])
2586
2587   @classmethod
2588   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2589     """Choose which nodes should talk to which other nodes.
2590
2591     We will make nodes contact all nodes in their group, and one node from
2592     every other group.
2593
2594     @warning: This algorithm has a known issue if one node group is much
2595       smaller than others (e.g. just one node). In such a case all other
2596       nodes will talk to the single node.
2597
2598     """
2599     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2600     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2601
2602     return (online_nodes,
2603             dict((name, sorted([i.next() for i in sel]))
2604                  for name in online_nodes))
2605
2606   def BuildHooksEnv(self):
2607     """Build hooks env.
2608
2609     Cluster-Verify hooks just ran in the post phase and their failure makes
2610     the output be logged in the verify output and the verification to fail.
2611
2612     """
2613     env = {
2614       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2615       }
2616
2617     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2618                for node in self.my_node_info.values())
2619
2620     return env
2621
2622   def BuildHooksNodes(self):
2623     """Build hooks nodes.
2624
2625     """
2626     return ([], self.my_node_names)
2627
2628   def Exec(self, feedback_fn):
2629     """Verify integrity of the node group, performing various test on nodes.
2630
2631     """
2632     # This method has too many local variables. pylint: disable=R0914
2633     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2634
2635     if not self.my_node_names:
2636       # empty node group
2637       feedback_fn("* Empty node group, skipping verification")
2638       return True
2639
2640     self.bad = False
2641     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2642     verbose = self.op.verbose
2643     self._feedback_fn = feedback_fn
2644
2645     vg_name = self.cfg.GetVGName()
2646     drbd_helper = self.cfg.GetDRBDHelper()
2647     cluster = self.cfg.GetClusterInfo()
2648     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2649     hypervisors = cluster.enabled_hypervisors
2650     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2651
2652     i_non_redundant = [] # Non redundant instances
2653     i_non_a_balanced = [] # Non auto-balanced instances
2654     n_offline = 0 # Count of offline nodes
2655     n_drained = 0 # Count of nodes being drained
2656     node_vol_should = {}
2657
2658     # FIXME: verify OS list
2659
2660     # File verification
2661     filemap = _ComputeAncillaryFiles(cluster, False)
2662
2663     # do local checksums
2664     master_node = self.master_node = self.cfg.GetMasterNode()
2665     master_ip = self.cfg.GetMasterIP()
2666
2667     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2668
2669     node_verify_param = {
2670       constants.NV_FILELIST:
2671         utils.UniqueSequence(filename
2672                              for files in filemap
2673                              for filename in files),
2674       constants.NV_NODELIST:
2675         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2676                                   self.all_node_info.values()),
2677       constants.NV_HYPERVISOR: hypervisors,
2678       constants.NV_HVPARAMS:
2679         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2680       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2681                                  for node in node_data_list
2682                                  if not node.offline],
2683       constants.NV_INSTANCELIST: hypervisors,
2684       constants.NV_VERSION: None,
2685       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2686       constants.NV_NODESETUP: None,
2687       constants.NV_TIME: None,
2688       constants.NV_MASTERIP: (master_node, master_ip),
2689       constants.NV_OSLIST: None,
2690       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2691       }
2692
2693     if vg_name is not None:
2694       node_verify_param[constants.NV_VGLIST] = None
2695       node_verify_param[constants.NV_LVLIST] = vg_name
2696       node_verify_param[constants.NV_PVLIST] = [vg_name]
2697       node_verify_param[constants.NV_DRBDLIST] = None
2698
2699     if drbd_helper:
2700       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2701
2702     # bridge checks
2703     # FIXME: this needs to be changed per node-group, not cluster-wide
2704     bridges = set()
2705     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2706     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2707       bridges.add(default_nicpp[constants.NIC_LINK])
2708     for instance in self.my_inst_info.values():
2709       for nic in instance.nics:
2710         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2711         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2712           bridges.add(full_nic[constants.NIC_LINK])
2713
2714     if bridges:
2715       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2716
2717     # Build our expected cluster state
2718     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2719                                                  name=node.name,
2720                                                  vm_capable=node.vm_capable))
2721                       for node in node_data_list)
2722
2723     # Gather OOB paths
2724     oob_paths = []
2725     for node in self.all_node_info.values():
2726       path = _SupportsOob(self.cfg, node)
2727       if path and path not in oob_paths:
2728         oob_paths.append(path)
2729
2730     if oob_paths:
2731       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2732
2733     for instance in self.my_inst_names:
2734       inst_config = self.my_inst_info[instance]
2735
2736       for nname in inst_config.all_nodes:
2737         if nname not in node_image:
2738           gnode = self.NodeImage(name=nname)
2739           gnode.ghost = (nname not in self.all_node_info)
2740           node_image[nname] = gnode
2741
2742       inst_config.MapLVsByNode(node_vol_should)
2743
2744       pnode = inst_config.primary_node
2745       node_image[pnode].pinst.append(instance)
2746
2747       for snode in inst_config.secondary_nodes:
2748         nimg = node_image[snode]
2749         nimg.sinst.append(instance)
2750         if pnode not in nimg.sbp:
2751           nimg.sbp[pnode] = []
2752         nimg.sbp[pnode].append(instance)
2753
2754     # At this point, we have the in-memory data structures complete,
2755     # except for the runtime information, which we'll gather next
2756
2757     # Due to the way our RPC system works, exact response times cannot be
2758     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2759     # time before and after executing the request, we can at least have a time
2760     # window.
2761     nvinfo_starttime = time.time()
2762     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2763                                            node_verify_param,
2764                                            self.cfg.GetClusterName())
2765     nvinfo_endtime = time.time()
2766
2767     if self.extra_lv_nodes and vg_name is not None:
2768       extra_lv_nvinfo = \
2769           self.rpc.call_node_verify(self.extra_lv_nodes,
2770                                     {constants.NV_LVLIST: vg_name},
2771                                     self.cfg.GetClusterName())
2772     else:
2773       extra_lv_nvinfo = {}
2774
2775     all_drbd_map = self.cfg.ComputeDRBDMap()
2776
2777     feedback_fn("* Gathering disk information (%s nodes)" %
2778                 len(self.my_node_names))
2779     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2780                                      self.my_inst_info)
2781
2782     feedback_fn("* Verifying configuration file consistency")
2783
2784     # If not all nodes are being checked, we need to make sure the master node
2785     # and a non-checked vm_capable node are in the list.
2786     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2787     if absent_nodes:
2788       vf_nvinfo = all_nvinfo.copy()
2789       vf_node_info = list(self.my_node_info.values())
2790       additional_nodes = []
2791       if master_node not in self.my_node_info:
2792         additional_nodes.append(master_node)
2793         vf_node_info.append(self.all_node_info[master_node])
2794       # Add the first vm_capable node we find which is not included
2795       for node in absent_nodes:
2796         nodeinfo = self.all_node_info[node]
2797         if nodeinfo.vm_capable and not nodeinfo.offline:
2798           additional_nodes.append(node)
2799           vf_node_info.append(self.all_node_info[node])
2800           break
2801       key = constants.NV_FILELIST
2802       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2803                                                  {key: node_verify_param[key]},
2804                                                  self.cfg.GetClusterName()))
2805     else:
2806       vf_nvinfo = all_nvinfo
2807       vf_node_info = self.my_node_info.values()
2808
2809     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2810
2811     feedback_fn("* Verifying node status")
2812
2813     refos_img = None
2814
2815     for node_i in node_data_list:
2816       node = node_i.name
2817       nimg = node_image[node]
2818
2819       if node_i.offline:
2820         if verbose:
2821           feedback_fn("* Skipping offline node %s" % (node,))
2822         n_offline += 1
2823         continue
2824
2825       if node == master_node:
2826         ntype = "master"
2827       elif node_i.master_candidate:
2828         ntype = "master candidate"
2829       elif node_i.drained:
2830         ntype = "drained"
2831         n_drained += 1
2832       else:
2833         ntype = "regular"
2834       if verbose:
2835         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2836
2837       msg = all_nvinfo[node].fail_msg
2838       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2839       if msg:
2840         nimg.rpc_fail = True
2841         continue
2842
2843       nresult = all_nvinfo[node].payload
2844
2845       nimg.call_ok = self._VerifyNode(node_i, nresult)
2846       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2847       self._VerifyNodeNetwork(node_i, nresult)
2848       self._VerifyOob(node_i, nresult)
2849
2850       if nimg.vm_capable:
2851         self._VerifyNodeLVM(node_i, nresult, vg_name)
2852         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2853                              all_drbd_map)
2854
2855         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2856         self._UpdateNodeInstances(node_i, nresult, nimg)
2857         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2858         self._UpdateNodeOS(node_i, nresult, nimg)
2859
2860         if not nimg.os_fail:
2861           if refos_img is None:
2862             refos_img = nimg
2863           self._VerifyNodeOS(node_i, nimg, refos_img)
2864         self._VerifyNodeBridges(node_i, nresult, bridges)
2865
2866         # Check whether all running instancies are primary for the node. (This
2867         # can no longer be done from _VerifyInstance below, since some of the
2868         # wrong instances could be from other node groups.)
2869         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2870
2871         for inst in non_primary_inst:
2872           test = inst in self.all_inst_info
2873           _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2874                    "instance should not run on node %s", node_i.name)
2875           _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2876                    "node is running unknown instance %s", inst)
2877
2878     for node, result in extra_lv_nvinfo.items():
2879       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2880                               node_image[node], vg_name)
2881
2882     feedback_fn("* Verifying instance status")
2883     for instance in self.my_inst_names:
2884       if verbose:
2885         feedback_fn("* Verifying instance %s" % instance)
2886       inst_config = self.my_inst_info[instance]
2887       self._VerifyInstance(instance, inst_config, node_image,
2888                            instdisk[instance])
2889       inst_nodes_offline = []
2890
2891       pnode = inst_config.primary_node
2892       pnode_img = node_image[pnode]
2893       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2894                self.ENODERPC, pnode, "instance %s, connection to"
2895                " primary node failed", instance)
2896
2897       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2898                self.EINSTANCEBADNODE, instance,
2899                "instance is marked as running and lives on offline node %s",
2900                inst_config.primary_node)
2901
2902       # If the instance is non-redundant we cannot survive losing its primary
2903       # node, so we are not N+1 compliant. On the other hand we have no disk
2904       # templates with more than one secondary so that situation is not well
2905       # supported either.
2906       # FIXME: does not support file-backed instances
2907       if not inst_config.secondary_nodes:
2908         i_non_redundant.append(instance)
2909
2910       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2911                instance, "instance has multiple secondary nodes: %s",
2912                utils.CommaJoin(inst_config.secondary_nodes),
2913                code=self.ETYPE_WARNING)
2914
2915       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2916         pnode = inst_config.primary_node
2917         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2918         instance_groups = {}
2919
2920         for node in instance_nodes:
2921           instance_groups.setdefault(self.all_node_info[node].group,
2922                                      []).append(node)
2923
2924         pretty_list = [
2925           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2926           # Sort so that we always list the primary node first.
2927           for group, nodes in sorted(instance_groups.items(),
2928                                      key=lambda (_, nodes): pnode in nodes,
2929                                      reverse=True)]
2930
2931         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2932                       instance, "instance has primary and secondary nodes in"
2933                       " different groups: %s", utils.CommaJoin(pretty_list),
2934                       code=self.ETYPE_WARNING)
2935
2936       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2937         i_non_a_balanced.append(instance)
2938
2939       for snode in inst_config.secondary_nodes:
2940         s_img = node_image[snode]
2941         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2942                  "instance %s, connection to secondary node failed", instance)
2943
2944         if s_img.offline:
2945           inst_nodes_offline.append(snode)
2946
2947       # warn that the instance lives on offline nodes
2948       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2949                "instance has offline secondary node(s) %s",
2950                utils.CommaJoin(inst_nodes_offline))
2951       # ... or ghost/non-vm_capable nodes
2952       for node in inst_config.all_nodes:
2953         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2954                  "instance lives on ghost node %s", node)
2955         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2956                  instance, "instance lives on non-vm_capable node %s", node)
2957
2958     feedback_fn("* Verifying orphan volumes")
2959     reserved = utils.FieldSet(*cluster.reserved_lvs)
2960
2961     # We will get spurious "unknown volume" warnings if any node of this group
2962     # is secondary for an instance whose primary is in another group. To avoid
2963     # them, we find these instances and add their volumes to node_vol_should.
2964     for inst in self.all_inst_info.values():
2965       for secondary in inst.secondary_nodes:
2966         if (secondary in self.my_node_info
2967             and inst.name not in self.my_inst_info):
2968           inst.MapLVsByNode(node_vol_should)
2969           break
2970
2971     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2972
2973     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2974       feedback_fn("* Verifying N+1 Memory redundancy")
2975       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2976
2977     feedback_fn("* Other Notes")
2978     if i_non_redundant:
2979       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2980                   % len(i_non_redundant))
2981
2982     if i_non_a_balanced:
2983       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2984                   % len(i_non_a_balanced))
2985
2986     if n_offline:
2987       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2988
2989     if n_drained:
2990       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2991
2992     return not self.bad
2993
2994   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2995     """Analyze the post-hooks' result
2996
2997     This method analyses the hook result, handles it, and sends some
2998     nicely-formatted feedback back to the user.
2999
3000     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3001         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3002     @param hooks_results: the results of the multi-node hooks rpc call
3003     @param feedback_fn: function used send feedback back to the caller
3004     @param lu_result: previous Exec result
3005     @return: the new Exec result, based on the previous result
3006         and hook results
3007
3008     """
3009     # We only really run POST phase hooks, only for non-empty groups,
3010     # and are only interested in their results
3011     if not self.my_node_names:
3012       # empty node group
3013       pass
3014     elif phase == constants.HOOKS_PHASE_POST:
3015       # Used to change hooks' output to proper indentation
3016       feedback_fn("* Hooks Results")
3017       assert hooks_results, "invalid result from hooks"
3018
3019       for node_name in hooks_results:
3020         res = hooks_results[node_name]
3021         msg = res.fail_msg
3022         test = msg and not res.offline
3023         self._ErrorIf(test, self.ENODEHOOKS, node_name,
3024                       "Communication failure in hooks execution: %s", msg)
3025         if res.offline or msg:
3026           # No need to investigate payload if node is offline or gave
3027           # an error.
3028           continue
3029         for script, hkr, output in res.payload:
3030           test = hkr == constants.HKR_FAIL
3031           self._ErrorIf(test, self.ENODEHOOKS, node_name,
3032                         "Script %s failed, output:", script)
3033           if test:
3034             output = self._HOOKS_INDENT_RE.sub("      ", output)
3035             feedback_fn("%s" % output)
3036             lu_result = False
3037
3038     return lu_result
3039
3040
3041 class LUClusterVerifyDisks(NoHooksLU):
3042   """Verifies the cluster disks status.
3043
3044   """
3045   REQ_BGL = False
3046
3047   def ExpandNames(self):
3048     self.share_locks = _ShareAll()
3049     self.needed_locks = {
3050       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3051       }
3052
3053   def Exec(self, feedback_fn):
3054     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3055
3056     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3057     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3058                            for group in group_names])
3059
3060
3061 class LUGroupVerifyDisks(NoHooksLU):
3062   """Verifies the status of all disks in a node group.
3063
3064   """
3065   REQ_BGL = False
3066
3067   def ExpandNames(self):
3068     # Raises errors.OpPrereqError on its own if group can't be found
3069     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3070
3071     self.share_locks = _ShareAll()
3072     self.needed_locks = {
3073       locking.LEVEL_INSTANCE: [],
3074       locking.LEVEL_NODEGROUP: [],
3075       locking.LEVEL_NODE: [],
3076       }
3077
3078   def DeclareLocks(self, level):
3079     if level == locking.LEVEL_INSTANCE:
3080       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3081
3082       # Lock instances optimistically, needs verification once node and group
3083       # locks have been acquired
3084       self.needed_locks[locking.LEVEL_INSTANCE] = \
3085         self.cfg.GetNodeGroupInstances(self.group_uuid)
3086
3087     elif level == locking.LEVEL_NODEGROUP:
3088       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3089
3090       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3091         set([self.group_uuid] +
3092             # Lock all groups used by instances optimistically; this requires
3093             # going via the node before it's locked, requiring verification
3094             # later on
3095             [group_uuid
3096              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3097              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3098
3099     elif level == locking.LEVEL_NODE:
3100       # This will only lock the nodes in the group to be verified which contain
3101       # actual instances
3102       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3103       self._LockInstancesNodes()
3104
3105       # Lock all nodes in group to be verified
3106       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3107       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3108       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3109
3110   def CheckPrereq(self):
3111     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3112     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3113     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3114
3115     assert self.group_uuid in owned_groups
3116
3117     # Check if locked instances are still correct
3118     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3119
3120     # Get instance information
3121     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3122
3123     # Check if node groups for locked instances are still correct
3124     for (instance_name, inst) in self.instances.items():
3125       assert owned_nodes.issuperset(inst.all_nodes), \
3126         "Instance %s's nodes changed while we kept the lock" % instance_name
3127
3128       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3129                                              owned_groups)
3130
3131       assert self.group_uuid in inst_groups, \
3132         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3133
3134   def Exec(self, feedback_fn):
3135     """Verify integrity of cluster disks.
3136
3137     @rtype: tuple of three items
3138     @return: a tuple of (dict of node-to-node_error, list of instances
3139         which need activate-disks, dict of instance: (node, volume) for
3140         missing volumes
3141
3142     """
3143     res_nodes = {}
3144     res_instances = set()
3145     res_missing = {}
3146
3147     nv_dict = _MapInstanceDisksToNodes([inst
3148                                         for inst in self.instances.values()
3149                                         if inst.admin_up])
3150
3151     if nv_dict:
3152       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3153                              set(self.cfg.GetVmCapableNodeList()))
3154
3155       node_lvs = self.rpc.call_lv_list(nodes, [])
3156
3157       for (node, node_res) in node_lvs.items():
3158         if node_res.offline:
3159           continue
3160
3161         msg = node_res.fail_msg
3162         if msg:
3163           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3164           res_nodes[node] = msg
3165           continue
3166
3167         for lv_name, (_, _, lv_online) in node_res.payload.items():
3168           inst = nv_dict.pop((node, lv_name), None)
3169           if not (lv_online or inst is None):
3170             res_instances.add(inst)
3171
3172       # any leftover items in nv_dict are missing LVs, let's arrange the data
3173       # better
3174       for key, inst in nv_dict.iteritems():
3175         res_missing.setdefault(inst, []).append(list(key))
3176
3177     return (res_nodes, list(res_instances), res_missing)
3178
3179
3180 class LUClusterRepairDiskSizes(NoHooksLU):
3181   """Verifies the cluster disks sizes.
3182
3183   """
3184   REQ_BGL = False
3185
3186   def ExpandNames(self):
3187     if self.op.instances:
3188       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3189       self.needed_locks = {
3190         locking.LEVEL_NODE: [],
3191         locking.LEVEL_INSTANCE: self.wanted_names,
3192         }
3193       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3194     else:
3195       self.wanted_names = None
3196       self.needed_locks = {
3197         locking.LEVEL_NODE: locking.ALL_SET,
3198         locking.LEVEL_INSTANCE: locking.ALL_SET,
3199         }
3200     self.share_locks = {
3201       locking.LEVEL_NODE: 1,
3202       locking.LEVEL_INSTANCE: 0,
3203       }
3204
3205   def DeclareLocks(self, level):
3206     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3207       self._LockInstancesNodes(primary_only=True)
3208
3209   def CheckPrereq(self):
3210     """Check prerequisites.
3211
3212     This only checks the optional instance list against the existing names.
3213
3214     """
3215     if self.wanted_names is None:
3216       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3217
3218     self.wanted_instances = \
3219         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3220
3221   def _EnsureChildSizes(self, disk):
3222     """Ensure children of the disk have the needed disk size.
3223
3224     This is valid mainly for DRBD8 and fixes an issue where the
3225     children have smaller disk size.
3226
3227     @param disk: an L{ganeti.objects.Disk} object
3228
3229     """
3230     if disk.dev_type == constants.LD_DRBD8:
3231       assert disk.children, "Empty children for DRBD8?"
3232       fchild = disk.children[0]
3233       mismatch = fchild.size < disk.size
3234       if mismatch:
3235         self.LogInfo("Child disk has size %d, parent %d, fixing",
3236                      fchild.size, disk.size)
3237         fchild.size = disk.size
3238
3239       # and we recurse on this child only, not on the metadev
3240       return self._EnsureChildSizes(fchild) or mismatch
3241     else:
3242       return False
3243
3244   def Exec(self, feedback_fn):
3245     """Verify the size of cluster disks.
3246
3247     """
3248     # TODO: check child disks too
3249     # TODO: check differences in size between primary/secondary nodes
3250     per_node_disks = {}
3251     for instance in self.wanted_instances:
3252       pnode = instance.primary_node
3253       if pnode not in per_node_disks:
3254         per_node_disks[pnode] = []
3255       for idx, disk in enumerate(instance.disks):
3256         per_node_disks[pnode].append((instance, idx, disk))
3257
3258     changed = []
3259     for node, dskl in per_node_disks.items():
3260       newl = [v[2].Copy() for v in dskl]
3261       for dsk in newl:
3262         self.cfg.SetDiskID(dsk, node)
3263       result = self.rpc.call_blockdev_getsize(node, newl)
3264       if result.fail_msg:
3265         self.LogWarning("Failure in blockdev_getsize call to node"
3266                         " %s, ignoring", node)
3267         continue
3268       if len(result.payload) != len(dskl):
3269         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3270                         " result.payload=%s", node, len(dskl), result.payload)
3271         self.LogWarning("Invalid result from node %s, ignoring node results",
3272                         node)
3273         continue
3274       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3275         if size is None:
3276           self.LogWarning("Disk %d of instance %s did not return size"
3277                           " information, ignoring", idx, instance.name)
3278           continue
3279         if not isinstance(size, (int, long)):
3280           self.LogWarning("Disk %d of instance %s did not return valid"
3281                           " size information, ignoring", idx, instance.name)
3282           continue
3283         size = size >> 20
3284         if size != disk.size:
3285           self.LogInfo("Disk %d of instance %s has mismatched size,"
3286                        " correcting: recorded %d, actual %d", idx,
3287                        instance.name, disk.size, size)
3288           disk.size = size
3289           self.cfg.Update(instance, feedback_fn)
3290           changed.append((instance.name, idx, size))
3291         if self._EnsureChildSizes(disk):
3292           self.cfg.Update(instance, feedback_fn)
3293           changed.append((instance.name, idx, disk.size))
3294     return changed
3295
3296
3297 class LUClusterRename(LogicalUnit):
3298   """Rename the cluster.
3299
3300   """
3301   HPATH = "cluster-rename"
3302   HTYPE = constants.HTYPE_CLUSTER
3303
3304   def BuildHooksEnv(self):
3305     """Build hooks env.
3306
3307     """
3308     return {
3309       "OP_TARGET": self.cfg.GetClusterName(),
3310       "NEW_NAME": self.op.name,
3311       }
3312
3313   def BuildHooksNodes(self):
3314     """Build hooks nodes.
3315
3316     """
3317     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3318
3319   def CheckPrereq(self):
3320     """Verify that the passed name is a valid one.
3321
3322     """
3323     hostname = netutils.GetHostname(name=self.op.name,
3324                                     family=self.cfg.GetPrimaryIPFamily())
3325
3326     new_name = hostname.name
3327     self.ip = new_ip = hostname.ip
3328     old_name = self.cfg.GetClusterName()
3329     old_ip = self.cfg.GetMasterIP()
3330     if new_name == old_name and new_ip == old_ip:
3331       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3332                                  " cluster has changed",
3333                                  errors.ECODE_INVAL)
3334     if new_ip != old_ip:
3335       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3336         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3337                                    " reachable on the network" %
3338                                    new_ip, errors.ECODE_NOTUNIQUE)
3339
3340     self.op.name = new_name
3341
3342   def Exec(self, feedback_fn):
3343     """Rename the cluster.
3344
3345     """
3346     clustername = self.op.name
3347     ip = self.ip
3348
3349     # shutdown the master IP
3350     master = self.cfg.GetMasterNode()
3351     result = self.rpc.call_node_stop_master(master, False)
3352     result.Raise("Could not disable the master role")
3353
3354     try:
3355       cluster = self.cfg.GetClusterInfo()
3356       cluster.cluster_name = clustername
3357       cluster.master_ip = ip
3358       self.cfg.Update(cluster, feedback_fn)
3359
3360       # update the known hosts file
3361       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3362       node_list = self.cfg.GetOnlineNodeList()
3363       try:
3364         node_list.remove(master)
3365       except ValueError:
3366         pass
3367       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3368     finally:
3369       result = self.rpc.call_node_start_master(master, False, False)
3370       msg = result.fail_msg
3371       if msg:
3372         self.LogWarning("Could not re-enable the master role on"
3373                         " the master, please restart manually: %s", msg)
3374
3375     return clustername
3376
3377
3378 class LUClusterSetParams(LogicalUnit):
3379   """Change the parameters of the cluster.
3380
3381   """
3382   HPATH = "cluster-modify"
3383   HTYPE = constants.HTYPE_CLUSTER
3384   REQ_BGL = False
3385
3386   def CheckArguments(self):
3387     """Check parameters
3388
3389     """
3390     if self.op.uid_pool:
3391       uidpool.CheckUidPool(self.op.uid_pool)
3392
3393     if self.op.add_uids:
3394       uidpool.CheckUidPool(self.op.add_uids)
3395
3396     if self.op.remove_uids:
3397       uidpool.CheckUidPool(self.op.remove_uids)
3398
3399   def ExpandNames(self):
3400     # FIXME: in the future maybe other cluster params won't require checking on
3401     # all nodes to be modified.
3402     self.needed_locks = {
3403       locking.LEVEL_NODE: locking.ALL_SET,
3404     }
3405     self.share_locks[locking.LEVEL_NODE] = 1
3406
3407   def BuildHooksEnv(self):
3408     """Build hooks env.
3409
3410     """
3411     return {
3412       "OP_TARGET": self.cfg.GetClusterName(),
3413       "NEW_VG_NAME": self.op.vg_name,
3414       }
3415
3416   def BuildHooksNodes(self):
3417     """Build hooks nodes.
3418
3419     """
3420     mn = self.cfg.GetMasterNode()
3421     return ([mn], [mn])
3422
3423   def CheckPrereq(self):
3424     """Check prerequisites.
3425
3426     This checks whether the given params don't conflict and
3427     if the given volume group is valid.
3428
3429     """
3430     if self.op.vg_name is not None and not self.op.vg_name:
3431       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3432         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3433                                    " instances exist", errors.ECODE_INVAL)
3434
3435     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3436       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3437         raise errors.OpPrereqError("Cannot disable drbd helper while"
3438                                    " drbd-based instances exist",
3439                                    errors.ECODE_INVAL)
3440
3441     node_list = self.owned_locks(locking.LEVEL_NODE)
3442
3443     # if vg_name not None, checks given volume group on all nodes
3444     if self.op.vg_name:
3445       vglist = self.rpc.call_vg_list(node_list)
3446       for node in node_list:
3447         msg = vglist[node].fail_msg
3448         if msg:
3449           # ignoring down node
3450           self.LogWarning("Error while gathering data on node %s"
3451                           " (ignoring node): %s", node, msg)
3452           continue
3453         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3454                                               self.op.vg_name,
3455                                               constants.MIN_VG_SIZE)
3456         if vgstatus:
3457           raise errors.OpPrereqError("Error on node '%s': %s" %
3458                                      (node, vgstatus), errors.ECODE_ENVIRON)
3459
3460     if self.op.drbd_helper:
3461       # checks given drbd helper on all nodes
3462       helpers = self.rpc.call_drbd_helper(node_list)
3463       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3464         if ninfo.offline:
3465           self.LogInfo("Not checking drbd helper on offline node %s", node)
3466           continue
3467         msg = helpers[node].fail_msg
3468         if msg:
3469           raise errors.OpPrereqError("Error checking drbd helper on node"
3470                                      " '%s': %s" % (node, msg),
3471                                      errors.ECODE_ENVIRON)
3472         node_helper = helpers[node].payload
3473         if node_helper != self.op.drbd_helper:
3474           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3475                                      (node, node_helper), errors.ECODE_ENVIRON)
3476
3477     self.cluster = cluster = self.cfg.GetClusterInfo()
3478     # validate params changes
3479     if self.op.beparams:
3480       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3481       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3482
3483     if self.op.ndparams:
3484       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3485       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3486
3487       # TODO: we need a more general way to handle resetting
3488       # cluster-level parameters to default values
3489       if self.new_ndparams["oob_program"] == "":
3490         self.new_ndparams["oob_program"] = \
3491             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3492
3493     if self.op.nicparams:
3494       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3495       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3496       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3497       nic_errors = []
3498
3499       # check all instances for consistency
3500       for instance in self.cfg.GetAllInstancesInfo().values():
3501         for nic_idx, nic in enumerate(instance.nics):
3502           params_copy = copy.deepcopy(nic.nicparams)
3503           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3504
3505           # check parameter syntax
3506           try:
3507             objects.NIC.CheckParameterSyntax(params_filled)
3508           except errors.ConfigurationError, err:
3509             nic_errors.append("Instance %s, nic/%d: %s" %
3510                               (instance.name, nic_idx, err))
3511
3512           # if we're moving instances to routed, check that they have an ip
3513           target_mode = params_filled[constants.NIC_MODE]
3514           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3515             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3516                               " address" % (instance.name, nic_idx))
3517       if nic_errors:
3518         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3519                                    "\n".join(nic_errors))
3520
3521     # hypervisor list/parameters
3522     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3523     if self.op.hvparams:
3524       for hv_name, hv_dict in self.op.hvparams.items():
3525         if hv_name not in self.new_hvparams:
3526           self.new_hvparams[hv_name] = hv_dict
3527         else:
3528           self.new_hvparams[hv_name].update(hv_dict)
3529
3530     # os hypervisor parameters
3531     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3532     if self.op.os_hvp:
3533       for os_name, hvs in self.op.os_hvp.items():
3534         if os_name not in self.new_os_hvp:
3535           self.new_os_hvp[os_name] = hvs
3536         else:
3537           for hv_name, hv_dict in hvs.items():
3538             if hv_name not in self.new_os_hvp[os_name]:
3539               self.new_os_hvp[os_name][hv_name] = hv_dict
3540             else:
3541               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3542
3543     # os parameters
3544     self.new_osp = objects.FillDict(cluster.osparams, {})
3545     if self.op.osparams:
3546       for os_name, osp in self.op.osparams.items():
3547         if os_name not in self.new_osp:
3548           self.new_osp[os_name] = {}
3549
3550         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3551                                                   use_none=True)
3552
3553         if not self.new_osp[os_name]:
3554           # we removed all parameters
3555           del self.new_osp[os_name]
3556         else:
3557           # check the parameter validity (remote check)
3558           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3559                          os_name, self.new_osp[os_name])
3560
3561     # changes to the hypervisor list
3562     if self.op.enabled_hypervisors is not None:
3563       self.hv_list = self.op.enabled_hypervisors
3564       for hv in self.hv_list:
3565         # if the hypervisor doesn't already exist in the cluster
3566         # hvparams, we initialize it to empty, and then (in both
3567         # cases) we make sure to fill the defaults, as we might not
3568         # have a complete defaults list if the hypervisor wasn't
3569         # enabled before
3570         if hv not in new_hvp:
3571           new_hvp[hv] = {}
3572         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3573         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3574     else:
3575       self.hv_list = cluster.enabled_hypervisors
3576
3577     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3578       # either the enabled list has changed, or the parameters have, validate
3579       for hv_name, hv_params in self.new_hvparams.items():
3580         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3581             (self.op.enabled_hypervisors and
3582              hv_name in self.op.enabled_hypervisors)):
3583           # either this is a new hypervisor, or its parameters have changed
3584           hv_class = hypervisor.GetHypervisor(hv_name)
3585           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3586           hv_class.CheckParameterSyntax(hv_params)
3587           _CheckHVParams(self, node_list, hv_name, hv_params)
3588
3589     if self.op.os_hvp:
3590       # no need to check any newly-enabled hypervisors, since the
3591       # defaults have already been checked in the above code-block
3592       for os_name, os_hvp in self.new_os_hvp.items():
3593         for hv_name, hv_params in os_hvp.items():
3594           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3595           # we need to fill in the new os_hvp on top of the actual hv_p
3596           cluster_defaults = self.new_hvparams.get(hv_name, {})
3597           new_osp = objects.FillDict(cluster_defaults, hv_params)
3598           hv_class = hypervisor.GetHypervisor(hv_name)
3599           hv_class.CheckParameterSyntax(new_osp)
3600           _CheckHVParams(self, node_list, hv_name, new_osp)
3601
3602     if self.op.default_iallocator:
3603       alloc_script = utils.FindFile(self.op.default_iallocator,
3604                                     constants.IALLOCATOR_SEARCH_PATH,
3605                                     os.path.isfile)
3606       if alloc_script is None:
3607         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3608                                    " specified" % self.op.default_iallocator,
3609                                    errors.ECODE_INVAL)
3610
3611   def Exec(self, feedback_fn):
3612     """Change the parameters of the cluster.
3613
3614     """
3615     if self.op.vg_name is not None:
3616       new_volume = self.op.vg_name
3617       if not new_volume:
3618         new_volume = None
3619       if new_volume != self.cfg.GetVGName():
3620         self.cfg.SetVGName(new_volume)
3621       else:
3622         feedback_fn("Cluster LVM configuration already in desired"
3623                     " state, not changing")
3624     if self.op.drbd_helper is not None:
3625       new_helper = self.op.drbd_helper
3626       if not new_helper:
3627         new_helper = None
3628       if new_helper != self.cfg.GetDRBDHelper():
3629         self.cfg.SetDRBDHelper(new_helper)
3630       else:
3631         feedback_fn("Cluster DRBD helper already in desired state,"
3632                     " not changing")
3633     if self.op.hvparams:
3634       self.cluster.hvparams = self.new_hvparams
3635     if self.op.os_hvp:
3636       self.cluster.os_hvp = self.new_os_hvp
3637     if self.op.enabled_hypervisors is not None:
3638       self.cluster.hvparams = self.new_hvparams
3639       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3640     if self.op.beparams:
3641       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3642     if self.op.nicparams:
3643       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3644     if self.op.osparams:
3645       self.cluster.osparams = self.new_osp
3646     if self.op.ndparams:
3647       self.cluster.ndparams = self.new_ndparams
3648
3649     if self.op.candidate_pool_size is not None:
3650       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3651       # we need to update the pool size here, otherwise the save will fail
3652       _AdjustCandidatePool(self, [])
3653
3654     if self.op.maintain_node_health is not None:
3655       self.cluster.maintain_node_health = self.op.maintain_node_health
3656
3657     if self.op.prealloc_wipe_disks is not None:
3658       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3659
3660     if self.op.add_uids is not None:
3661       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3662
3663     if self.op.remove_uids is not None:
3664       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3665
3666     if self.op.uid_pool is not None:
3667       self.cluster.uid_pool = self.op.uid_pool
3668
3669     if self.op.default_iallocator is not None:
3670       self.cluster.default_iallocator = self.op.default_iallocator
3671
3672     if self.op.reserved_lvs is not None:
3673       self.cluster.reserved_lvs = self.op.reserved_lvs
3674
3675     def helper_os(aname, mods, desc):
3676       desc += " OS list"
3677       lst = getattr(self.cluster, aname)
3678       for key, val in mods:
3679         if key == constants.DDM_ADD:
3680           if val in lst:
3681             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3682           else:
3683             lst.append(val)
3684         elif key == constants.DDM_REMOVE:
3685           if val in lst:
3686             lst.remove(val)
3687           else:
3688             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3689         else:
3690           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3691
3692     if self.op.hidden_os:
3693       helper_os("hidden_os", self.op.hidden_os, "hidden")
3694
3695     if self.op.blacklisted_os:
3696       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3697
3698     if self.op.master_netdev:
3699       master = self.cfg.GetMasterNode()
3700       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3701                   self.cluster.master_netdev)
3702       result = self.rpc.call_node_stop_master(master, False)
3703       result.Raise("Could not disable the master ip")
3704       feedback_fn("Changing master_netdev from %s to %s" %
3705                   (self.cluster.master_netdev, self.op.master_netdev))
3706       self.cluster.master_netdev = self.op.master_netdev
3707
3708     self.cfg.Update(self.cluster, feedback_fn)
3709
3710     if self.op.master_netdev:
3711       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3712                   self.op.master_netdev)
3713       result = self.rpc.call_node_start_master(master, False, False)
3714       if result.fail_msg:
3715         self.LogWarning("Could not re-enable the master ip on"
3716                         " the master, please restart manually: %s",
3717                         result.fail_msg)
3718
3719
3720 def _UploadHelper(lu, nodes, fname):
3721   """Helper for uploading a file and showing warnings.
3722
3723   """
3724   if os.path.exists(fname):
3725     result = lu.rpc.call_upload_file(nodes, fname)
3726     for to_node, to_result in result.items():
3727       msg = to_result.fail_msg
3728       if msg:
3729         msg = ("Copy of file %s to node %s failed: %s" %
3730                (fname, to_node, msg))
3731         lu.proc.LogWarning(msg)
3732
3733
3734 def _ComputeAncillaryFiles(cluster, redist):
3735   """Compute files external to Ganeti which need to be consistent.
3736
3737   @type redist: boolean
3738   @param redist: Whether to include files which need to be redistributed
3739
3740   """
3741   # Compute files for all nodes
3742   files_all = set([
3743     constants.SSH_KNOWN_HOSTS_FILE,
3744     constants.CONFD_HMAC_KEY,
3745     constants.CLUSTER_DOMAIN_SECRET_FILE,
3746     ])
3747
3748   if not redist:
3749     files_all.update(constants.ALL_CERT_FILES)
3750     files_all.update(ssconf.SimpleStore().GetFileList())
3751   else:
3752     # we need to ship at least the RAPI certificate
3753     files_all.add(constants.RAPI_CERT_FILE)
3754
3755   if cluster.modify_etc_hosts:
3756     files_all.add(constants.ETC_HOSTS)
3757
3758   # Files which must either exist on all nodes or on none
3759   files_all_opt = set([
3760     constants.RAPI_USERS_FILE,
3761     ])
3762
3763   # Files which should only be on master candidates
3764   files_mc = set()
3765   if not redist:
3766     files_mc.add(constants.CLUSTER_CONF_FILE)
3767
3768   # Files which should only be on VM-capable nodes
3769   files_vm = set(filename
3770     for hv_name in cluster.enabled_hypervisors
3771     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3772
3773   # Filenames must be unique
3774   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3775           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3776          "Found file listed in more than one file list"
3777
3778   return (files_all, files_all_opt, files_mc, files_vm)
3779
3780
3781 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3782   """Distribute additional files which are part of the cluster configuration.
3783
3784   ConfigWriter takes care of distributing the config and ssconf files, but
3785   there are more files which should be distributed to all nodes. This function
3786   makes sure those are copied.
3787
3788   @param lu: calling logical unit
3789   @param additional_nodes: list of nodes not in the config to distribute to
3790   @type additional_vm: boolean
3791   @param additional_vm: whether the additional nodes are vm-capable or not
3792
3793   """
3794   # Gather target nodes
3795   cluster = lu.cfg.GetClusterInfo()
3796   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3797
3798   online_nodes = lu.cfg.GetOnlineNodeList()
3799   vm_nodes = lu.cfg.GetVmCapableNodeList()
3800
3801   if additional_nodes is not None:
3802     online_nodes.extend(additional_nodes)
3803     if additional_vm:
3804       vm_nodes.extend(additional_nodes)
3805
3806   # Never distribute to master node
3807   for nodelist in [online_nodes, vm_nodes]:
3808     if master_info.name in nodelist:
3809       nodelist.remove(master_info.name)
3810
3811   # Gather file lists
3812   (files_all, files_all_opt, files_mc, files_vm) = \
3813     _ComputeAncillaryFiles(cluster, True)
3814
3815   # Never re-distribute configuration file from here
3816   assert not (constants.CLUSTER_CONF_FILE in files_all or
3817               constants.CLUSTER_CONF_FILE in files_vm)
3818   assert not files_mc, "Master candidates not handled in this function"
3819
3820   filemap = [
3821     (online_nodes, files_all),
3822     (online_nodes, files_all_opt),
3823     (vm_nodes, files_vm),
3824     ]
3825
3826   # Upload the files
3827   for (node_list, files) in filemap:
3828     for fname in files:
3829       _UploadHelper(lu, node_list, fname)
3830
3831
3832 class LUClusterRedistConf(NoHooksLU):
3833   """Force the redistribution of cluster configuration.
3834
3835   This is a very simple LU.
3836
3837   """
3838   REQ_BGL = False
3839
3840   def ExpandNames(self):
3841     self.needed_locks = {
3842       locking.LEVEL_NODE: locking.ALL_SET,
3843     }
3844     self.share_locks[locking.LEVEL_NODE] = 1
3845
3846   def Exec(self, feedback_fn):
3847     """Redistribute the configuration.
3848
3849     """
3850     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3851     _RedistributeAncillaryFiles(self)
3852
3853
3854 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3855   """Sleep and poll for an instance's disk to sync.
3856
3857   """
3858   if not instance.disks or disks is not None and not disks:
3859     return True
3860
3861   disks = _ExpandCheckDisks(instance, disks)
3862
3863   if not oneshot:
3864     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3865
3866   node = instance.primary_node
3867
3868   for dev in disks:
3869     lu.cfg.SetDiskID(dev, node)
3870
3871   # TODO: Convert to utils.Retry
3872
3873   retries = 0
3874   degr_retries = 10 # in seconds, as we sleep 1 second each time
3875   while True:
3876     max_time = 0
3877     done = True
3878     cumul_degraded = False
3879     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3880     msg = rstats.fail_msg
3881     if msg:
3882       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3883       retries += 1
3884       if retries >= 10:
3885         raise errors.RemoteError("Can't contact node %s for mirror data,"
3886                                  " aborting." % node)
3887       time.sleep(6)
3888       continue
3889     rstats = rstats.payload
3890     retries = 0
3891     for i, mstat in enumerate(rstats):
3892       if mstat is None:
3893         lu.LogWarning("Can't compute data for node %s/%s",
3894                            node, disks[i].iv_name)
3895         continue
3896
3897       cumul_degraded = (cumul_degraded or
3898                         (mstat.is_degraded and mstat.sync_percent is None))
3899       if mstat.sync_percent is not None:
3900         done = False
3901         if mstat.estimated_time is not None:
3902           rem_time = ("%s remaining (estimated)" %
3903                       utils.FormatSeconds(mstat.estimated_time))
3904           max_time = mstat.estimated_time
3905         else:
3906           rem_time = "no time estimate"
3907         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3908                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3909
3910     # if we're done but degraded, let's do a few small retries, to
3911     # make sure we see a stable and not transient situation; therefore
3912     # we force restart of the loop
3913     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3914       logging.info("Degraded disks found, %d retries left", degr_retries)
3915       degr_retries -= 1
3916       time.sleep(1)
3917       continue
3918
3919     if done or oneshot:
3920       break
3921
3922     time.sleep(min(60, max_time))
3923
3924   if done:
3925     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3926   return not cumul_degraded
3927
3928
3929 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3930   """Check that mirrors are not degraded.
3931
3932   The ldisk parameter, if True, will change the test from the
3933   is_degraded attribute (which represents overall non-ok status for
3934   the device(s)) to the ldisk (representing the local storage status).
3935
3936   """
3937   lu.cfg.SetDiskID(dev, node)
3938
3939   result = True
3940
3941   if on_primary or dev.AssembleOnSecondary():
3942     rstats = lu.rpc.call_blockdev_find(node, dev)
3943     msg = rstats.fail_msg
3944     if msg:
3945       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3946       result = False
3947     elif not rstats.payload:
3948       lu.LogWarning("Can't find disk on node %s", node)
3949       result = False
3950     else:
3951       if ldisk:
3952         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3953       else:
3954         result = result and not rstats.payload.is_degraded
3955
3956   if dev.children:
3957     for child in dev.children:
3958       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3959
3960   return result
3961
3962
3963 class LUOobCommand(NoHooksLU):
3964   """Logical unit for OOB handling.
3965
3966   """
3967   REG_BGL = False
3968   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3969
3970   def ExpandNames(self):
3971     """Gather locks we need.
3972
3973     """
3974     if self.op.node_names:
3975       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3976       lock_names = self.op.node_names
3977     else:
3978       lock_names = locking.ALL_SET
3979
3980     self.needed_locks = {
3981       locking.LEVEL_NODE: lock_names,
3982       }
3983
3984   def CheckPrereq(self):
3985     """Check prerequisites.
3986
3987     This checks:
3988      - the node exists in the configuration
3989      - OOB is supported
3990
3991     Any errors are signaled by raising errors.OpPrereqError.
3992
3993     """
3994     self.nodes = []
3995     self.master_node = self.cfg.GetMasterNode()
3996
3997     assert self.op.power_delay >= 0.0
3998
3999     if self.op.node_names:
4000       if (self.op.command in self._SKIP_MASTER and
4001           self.master_node in self.op.node_names):
4002         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4003         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4004
4005         if master_oob_handler:
4006           additional_text = ("run '%s %s %s' if you want to operate on the"
4007                              " master regardless") % (master_oob_handler,
4008                                                       self.op.command,
4009                                                       self.master_node)
4010         else:
4011           additional_text = "it does not support out-of-band operations"
4012
4013         raise errors.OpPrereqError(("Operating on the master node %s is not"
4014                                     " allowed for %s; %s") %
4015                                    (self.master_node, self.op.command,
4016                                     additional_text), errors.ECODE_INVAL)
4017     else:
4018       self.op.node_names = self.cfg.GetNodeList()
4019       if self.op.command in self._SKIP_MASTER:
4020         self.op.node_names.remove(self.master_node)
4021
4022     if self.op.command in self._SKIP_MASTER:
4023       assert self.master_node not in self.op.node_names
4024
4025     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4026       if node is None:
4027         raise errors.OpPrereqError("Node %s not found" % node_name,
4028                                    errors.ECODE_NOENT)
4029       else:
4030         self.nodes.append(node)
4031
4032       if (not self.op.ignore_status and
4033           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4034         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4035                                     " not marked offline") % node_name,
4036                                    errors.ECODE_STATE)
4037
4038   def Exec(self, feedback_fn):
4039     """Execute OOB and return result if we expect any.
4040
4041     """
4042     master_node = self.master_node
4043     ret = []
4044
4045     for idx, node in enumerate(utils.NiceSort(self.nodes,
4046                                               key=lambda node: node.name)):
4047       node_entry = [(constants.RS_NORMAL, node.name)]
4048       ret.append(node_entry)
4049
4050       oob_program = _SupportsOob(self.cfg, node)
4051
4052       if not oob_program:
4053         node_entry.append((constants.RS_UNAVAIL, None))
4054         continue
4055
4056       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4057                    self.op.command, oob_program, node.name)
4058       result = self.rpc.call_run_oob(master_node, oob_program,
4059                                      self.op.command, node.name,
4060                                      self.op.timeout)
4061
4062       if result.fail_msg:
4063         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4064                         node.name, result.fail_msg)
4065         node_entry.append((constants.RS_NODATA, None))
4066       else:
4067         try:
4068           self._CheckPayload(result)
4069         except errors.OpExecError, err:
4070           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4071                           node.name, err)
4072           node_entry.append((constants.RS_NODATA, None))
4073         else:
4074           if self.op.command == constants.OOB_HEALTH:
4075             # For health we should log important events
4076             for item, status in result.payload:
4077               if status in [constants.OOB_STATUS_WARNING,
4078                             constants.OOB_STATUS_CRITICAL]:
4079                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4080                                 item, node.name, status)
4081
4082           if self.op.command == constants.OOB_POWER_ON:
4083             node.powered = True
4084           elif self.op.command == constants.OOB_POWER_OFF:
4085             node.powered = False
4086           elif self.op.command == constants.OOB_POWER_STATUS:
4087             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4088             if powered != node.powered:
4089               logging.warning(("Recorded power state (%s) of node '%s' does not"
4090                                " match actual power state (%s)"), node.powered,
4091                               node.name, powered)
4092
4093           # For configuration changing commands we should update the node
4094           if self.op.command in (constants.OOB_POWER_ON,
4095                                  constants.OOB_POWER_OFF):
4096             self.cfg.Update(node, feedback_fn)
4097
4098           node_entry.append((constants.RS_NORMAL, result.payload))
4099
4100           if (self.op.command == constants.OOB_POWER_ON and
4101               idx < len(self.nodes) - 1):
4102             time.sleep(self.op.power_delay)
4103
4104     return ret
4105
4106   def _CheckPayload(self, result):
4107     """Checks if the payload is valid.
4108
4109     @param result: RPC result
4110     @raises errors.OpExecError: If payload is not valid
4111
4112     """
4113     errs = []
4114     if self.op.command == constants.OOB_HEALTH:
4115       if not isinstance(result.payload, list):
4116         errs.append("command 'health' is expected to return a list but got %s" %
4117                     type(result.payload))
4118       else:
4119         for item, status in result.payload:
4120           if status not in constants.OOB_STATUSES:
4121             errs.append("health item '%s' has invalid status '%s'" %
4122                         (item, status))
4123
4124     if self.op.command == constants.OOB_POWER_STATUS:
4125       if not isinstance(result.payload, dict):
4126         errs.append("power-status is expected to return a dict but got %s" %
4127                     type(result.payload))
4128
4129     if self.op.command in [
4130         constants.OOB_POWER_ON,
4131         constants.OOB_POWER_OFF,
4132         constants.OOB_POWER_CYCLE,
4133         ]:
4134       if result.payload is not None:
4135         errs.append("%s is expected to not return payload but got '%s'" %
4136                     (self.op.command, result.payload))
4137
4138     if errs:
4139       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4140                                utils.CommaJoin(errs))
4141
4142
4143 class _OsQuery(_QueryBase):
4144   FIELDS = query.OS_FIELDS
4145
4146   def ExpandNames(self, lu):
4147     # Lock all nodes in shared mode
4148     # Temporary removal of locks, should be reverted later
4149     # TODO: reintroduce locks when they are lighter-weight
4150     lu.needed_locks = {}
4151     #self.share_locks[locking.LEVEL_NODE] = 1
4152     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4153
4154     # The following variables interact with _QueryBase._GetNames
4155     if self.names:
4156       self.wanted = self.names
4157     else:
4158       self.wanted = locking.ALL_SET
4159
4160     self.do_locking = self.use_locking
4161
4162   def DeclareLocks(self, lu, level):
4163     pass
4164
4165   @staticmethod
4166   def _DiagnoseByOS(rlist):
4167     """Remaps a per-node return list into an a per-os per-node dictionary
4168
4169     @param rlist: a map with node names as keys and OS objects as values
4170
4171     @rtype: dict
4172     @return: a dictionary with osnames as keys and as value another
4173         map, with nodes as keys and tuples of (path, status, diagnose,
4174         variants, parameters, api_versions) as values, eg::
4175
4176           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4177                                      (/srv/..., False, "invalid api")],
4178                            "node2": [(/srv/..., True, "", [], [])]}
4179           }
4180
4181     """
4182     all_os = {}
4183     # we build here the list of nodes that didn't fail the RPC (at RPC
4184     # level), so that nodes with a non-responding node daemon don't
4185     # make all OSes invalid
4186     good_nodes = [node_name for node_name in rlist
4187                   if not rlist[node_name].fail_msg]
4188     for node_name, nr in rlist.items():
4189       if nr.fail_msg or not nr.payload:
4190         continue
4191       for (name, path, status, diagnose, variants,
4192            params, api_versions) in nr.payload:
4193         if name not in all_os:
4194           # build a list of nodes for this os containing empty lists
4195           # for each node in node_list
4196           all_os[name] = {}
4197           for nname in good_nodes:
4198             all_os[name][nname] = []
4199         # convert params from [name, help] to (name, help)
4200         params = [tuple(v) for v in params]
4201         all_os[name][node_name].append((path, status, diagnose,
4202                                         variants, params, api_versions))
4203     return all_os
4204
4205   def _GetQueryData(self, lu):
4206     """Computes the list of nodes and their attributes.
4207
4208     """
4209     # Locking is not used
4210     assert not (compat.any(lu.glm.is_owned(level)
4211                            for level in locking.LEVELS
4212                            if level != locking.LEVEL_CLUSTER) or
4213                 self.do_locking or self.use_locking)
4214
4215     valid_nodes = [node.name
4216                    for node in lu.cfg.GetAllNodesInfo().values()
4217                    if not node.offline and node.vm_capable]
4218     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4219     cluster = lu.cfg.GetClusterInfo()
4220
4221     data = {}
4222
4223     for (os_name, os_data) in pol.items():
4224       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4225                           hidden=(os_name in cluster.hidden_os),
4226                           blacklisted=(os_name in cluster.blacklisted_os))
4227
4228       variants = set()
4229       parameters = set()
4230       api_versions = set()
4231
4232       for idx, osl in enumerate(os_data.values()):
4233         info.valid = bool(info.valid and osl and osl[0][1])
4234         if not info.valid:
4235           break
4236
4237         (node_variants, node_params, node_api) = osl[0][3:6]
4238         if idx == 0:
4239           # First entry
4240           variants.update(node_variants)
4241           parameters.update(node_params)
4242           api_versions.update(node_api)
4243         else:
4244           # Filter out inconsistent values
4245           variants.intersection_update(node_variants)
4246           parameters.intersection_update(node_params)
4247           api_versions.intersection_update(node_api)
4248
4249       info.variants = list(variants)
4250       info.parameters = list(parameters)
4251       info.api_versions = list(api_versions)
4252
4253       data[os_name] = info
4254
4255     # Prepare data in requested order
4256     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4257             if name in data]
4258
4259
4260 class LUOsDiagnose(NoHooksLU):
4261   """Logical unit for OS diagnose/query.
4262
4263   """
4264   REQ_BGL = False
4265
4266   @staticmethod
4267   def _BuildFilter(fields, names):
4268     """Builds a filter for querying OSes.
4269
4270     """
4271     name_filter = qlang.MakeSimpleFilter("name", names)
4272
4273     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4274     # respective field is not requested
4275     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4276                      for fname in ["hidden", "blacklisted"]
4277                      if fname not in fields]
4278     if "valid" not in fields:
4279       status_filter.append([qlang.OP_TRUE, "valid"])
4280
4281     if status_filter:
4282       status_filter.insert(0, qlang.OP_AND)
4283     else:
4284       status_filter = None
4285
4286     if name_filter and status_filter:
4287       return [qlang.OP_AND, name_filter, status_filter]
4288     elif name_filter:
4289       return name_filter
4290     else:
4291       return status_filter
4292
4293   def CheckArguments(self):
4294     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4295                        self.op.output_fields, False)
4296
4297   def ExpandNames(self):
4298     self.oq.ExpandNames(self)
4299
4300   def Exec(self, feedback_fn):
4301     return self.oq.OldStyleQuery(self)
4302
4303
4304 class LUNodeRemove(LogicalUnit):
4305   """Logical unit for removing a node.
4306
4307   """
4308   HPATH = "node-remove"
4309   HTYPE = constants.HTYPE_NODE
4310
4311   def BuildHooksEnv(self):
4312     """Build hooks env.
4313
4314     This doesn't run on the target node in the pre phase as a failed
4315     node would then be impossible to remove.
4316
4317     """
4318     return {
4319       "OP_TARGET": self.op.node_name,
4320       "NODE_NAME": self.op.node_name,
4321       }
4322
4323   def BuildHooksNodes(self):
4324     """Build hooks nodes.
4325
4326     """
4327     all_nodes = self.cfg.GetNodeList()
4328     try:
4329       all_nodes.remove(self.op.node_name)
4330     except ValueError:
4331       logging.warning("Node '%s', which is about to be removed, was not found"
4332                       " in the list of all nodes", self.op.node_name)
4333     return (all_nodes, all_nodes)
4334
4335   def CheckPrereq(self):
4336     """Check prerequisites.
4337
4338     This checks:
4339      - the node exists in the configuration
4340      - it does not have primary or secondary instances
4341      - it's not the master
4342
4343     Any errors are signaled by raising errors.OpPrereqError.
4344
4345     """
4346     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4347     node = self.cfg.GetNodeInfo(self.op.node_name)
4348     assert node is not None
4349
4350     masternode = self.cfg.GetMasterNode()
4351     if node.name == masternode:
4352       raise errors.OpPrereqError("Node is the master node, failover to another"
4353                                  " node is required", errors.ECODE_INVAL)
4354
4355     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4356       if node.name in instance.all_nodes:
4357         raise errors.OpPrereqError("Instance %s is still running on the node,"
4358                                    " please remove first" % instance_name,
4359                                    errors.ECODE_INVAL)
4360     self.op.node_name = node.name
4361     self.node = node
4362
4363   def Exec(self, feedback_fn):
4364     """Removes the node from the cluster.
4365
4366     """
4367     node = self.node
4368     logging.info("Stopping the node daemon and removing configs from node %s",
4369                  node.name)
4370
4371     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4372
4373     # Promote nodes to master candidate as needed
4374     _AdjustCandidatePool(self, exceptions=[node.name])
4375     self.context.RemoveNode(node.name)
4376
4377     # Run post hooks on the node before it's removed
4378     _RunPostHook(self, node.name)
4379
4380     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4381     msg = result.fail_msg
4382     if msg:
4383       self.LogWarning("Errors encountered on the remote node while leaving"
4384                       " the cluster: %s", msg)
4385
4386     # Remove node from our /etc/hosts
4387     if self.cfg.GetClusterInfo().modify_etc_hosts:
4388       master_node = self.cfg.GetMasterNode()
4389       result = self.rpc.call_etc_hosts_modify(master_node,
4390                                               constants.ETC_HOSTS_REMOVE,
4391                                               node.name, None)
4392       result.Raise("Can't update hosts file with new host data")
4393       _RedistributeAncillaryFiles(self)
4394
4395
4396 class _NodeQuery(_QueryBase):
4397   FIELDS = query.NODE_FIELDS
4398
4399   def ExpandNames(self, lu):
4400     lu.needed_locks = {}
4401     lu.share_locks = _ShareAll()
4402
4403     if self.names:
4404       self.wanted = _GetWantedNodes(lu, self.names)
4405     else:
4406       self.wanted = locking.ALL_SET
4407
4408     self.do_locking = (self.use_locking and
4409                        query.NQ_LIVE in self.requested_data)
4410
4411     if self.do_locking:
4412       # If any non-static field is requested we need to lock the nodes
4413       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4414
4415   def DeclareLocks(self, lu, level):
4416     pass
4417
4418   def _GetQueryData(self, lu):
4419     """Computes the list of nodes and their attributes.
4420
4421     """
4422     all_info = lu.cfg.GetAllNodesInfo()
4423
4424     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4425
4426     # Gather data as requested
4427     if query.NQ_LIVE in self.requested_data:
4428       # filter out non-vm_capable nodes
4429       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4430
4431       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4432                                         lu.cfg.GetHypervisorType())
4433       live_data = dict((name, nresult.payload)
4434                        for (name, nresult) in node_data.items()
4435                        if not nresult.fail_msg and nresult.payload)
4436     else:
4437       live_data = None
4438
4439     if query.NQ_INST in self.requested_data:
4440       node_to_primary = dict([(name, set()) for name in nodenames])
4441       node_to_secondary = dict([(name, set()) for name in nodenames])
4442
4443       inst_data = lu.cfg.GetAllInstancesInfo()
4444
4445       for inst in inst_data.values():
4446         if inst.primary_node in node_to_primary:
4447           node_to_primary[inst.primary_node].add(inst.name)
4448         for secnode in inst.secondary_nodes:
4449           if secnode in node_to_secondary:
4450             node_to_secondary[secnode].add(inst.name)
4451     else:
4452       node_to_primary = None
4453       node_to_secondary = None
4454
4455     if query.NQ_OOB in self.requested_data:
4456       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4457                          for name, node in all_info.iteritems())
4458     else:
4459       oob_support = None
4460
4461     if query.NQ_GROUP in self.requested_data:
4462       groups = lu.cfg.GetAllNodeGroupsInfo()
4463     else:
4464       groups = {}
4465
4466     return query.NodeQueryData([all_info[name] for name in nodenames],
4467                                live_data, lu.cfg.GetMasterNode(),
4468                                node_to_primary, node_to_secondary, groups,
4469                                oob_support, lu.cfg.GetClusterInfo())
4470
4471
4472 class LUNodeQuery(NoHooksLU):
4473   """Logical unit for querying nodes.
4474
4475   """
4476   # pylint: disable=W0142
4477   REQ_BGL = False
4478
4479   def CheckArguments(self):
4480     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4481                          self.op.output_fields, self.op.use_locking)
4482
4483   def ExpandNames(self):
4484     self.nq.ExpandNames(self)
4485
4486   def Exec(self, feedback_fn):
4487     return self.nq.OldStyleQuery(self)
4488
4489
4490 class LUNodeQueryvols(NoHooksLU):
4491   """Logical unit for getting volumes on node(s).
4492
4493   """
4494   REQ_BGL = False
4495   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4496   _FIELDS_STATIC = utils.FieldSet("node")
4497
4498   def CheckArguments(self):
4499     _CheckOutputFields(static=self._FIELDS_STATIC,
4500                        dynamic=self._FIELDS_DYNAMIC,
4501                        selected=self.op.output_fields)
4502
4503   def ExpandNames(self):
4504     self.needed_locks = {}
4505     self.share_locks[locking.LEVEL_NODE] = 1
4506     if not self.op.nodes:
4507       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4508     else:
4509       self.needed_locks[locking.LEVEL_NODE] = \
4510         _GetWantedNodes(self, self.op.nodes)
4511
4512   def Exec(self, feedback_fn):
4513     """Computes the list of nodes and their attributes.
4514
4515     """
4516     nodenames = self.owned_locks(locking.LEVEL_NODE)
4517     volumes = self.rpc.call_node_volumes(nodenames)
4518
4519     ilist = self.cfg.GetAllInstancesInfo()
4520     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4521
4522     output = []
4523     for node in nodenames:
4524       nresult = volumes[node]
4525       if nresult.offline:
4526         continue
4527       msg = nresult.fail_msg
4528       if msg:
4529         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4530         continue
4531
4532       node_vols = sorted(nresult.payload,
4533                          key=operator.itemgetter("dev"))
4534
4535       for vol in node_vols:
4536         node_output = []
4537         for field in self.op.output_fields:
4538           if field == "node":
4539             val = node
4540           elif field == "phys":
4541             val = vol["dev"]
4542           elif field == "vg":
4543             val = vol["vg"]
4544           elif field == "name":
4545             val = vol["name"]
4546           elif field == "size":
4547             val = int(float(vol["size"]))
4548           elif field == "instance":
4549             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4550           else:
4551             raise errors.ParameterError(field)
4552           node_output.append(str(val))
4553
4554         output.append(node_output)
4555
4556     return output
4557
4558
4559 class LUNodeQueryStorage(NoHooksLU):
4560   """Logical unit for getting information on storage units on node(s).
4561
4562   """
4563   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4564   REQ_BGL = False
4565
4566   def CheckArguments(self):
4567     _CheckOutputFields(static=self._FIELDS_STATIC,
4568                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4569                        selected=self.op.output_fields)
4570
4571   def ExpandNames(self):
4572     self.needed_locks = {}
4573     self.share_locks[locking.LEVEL_NODE] = 1
4574
4575     if self.op.nodes:
4576       self.needed_locks[locking.LEVEL_NODE] = \
4577         _GetWantedNodes(self, self.op.nodes)
4578     else:
4579       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4580
4581   def Exec(self, feedback_fn):
4582     """Computes the list of nodes and their attributes.
4583
4584     """
4585     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4586
4587     # Always get name to sort by
4588     if constants.SF_NAME in self.op.output_fields:
4589       fields = self.op.output_fields[:]
4590     else:
4591       fields = [constants.SF_NAME] + self.op.output_fields
4592
4593     # Never ask for node or type as it's only known to the LU
4594     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4595       while extra in fields:
4596         fields.remove(extra)
4597
4598     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4599     name_idx = field_idx[constants.SF_NAME]
4600
4601     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4602     data = self.rpc.call_storage_list(self.nodes,
4603                                       self.op.storage_type, st_args,
4604                                       self.op.name, fields)
4605
4606     result = []
4607
4608     for node in utils.NiceSort(self.nodes):
4609       nresult = data[node]
4610       if nresult.offline:
4611         continue
4612
4613       msg = nresult.fail_msg
4614       if msg:
4615         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4616         continue
4617
4618       rows = dict([(row[name_idx], row) for row in nresult.payload])
4619
4620       for name in utils.NiceSort(rows.keys()):
4621         row = rows[name]
4622
4623         out = []
4624
4625         for field in self.op.output_fields:
4626           if field == constants.SF_NODE:
4627             val = node
4628           elif field == constants.SF_TYPE:
4629             val = self.op.storage_type
4630           elif field in field_idx:
4631             val = row[field_idx[field]]
4632           else:
4633             raise errors.ParameterError(field)
4634
4635           out.append(val)
4636
4637         result.append(out)
4638
4639     return result
4640
4641
4642 class _InstanceQuery(_QueryBase):
4643   FIELDS = query.INSTANCE_FIELDS
4644
4645   def ExpandNames(self, lu):
4646     lu.needed_locks = {}
4647     lu.share_locks = _ShareAll()
4648
4649     if self.names:
4650       self.wanted = _GetWantedInstances(lu, self.names)
4651     else:
4652       self.wanted = locking.ALL_SET
4653
4654     self.do_locking = (self.use_locking and
4655                        query.IQ_LIVE in self.requested_data)
4656     if self.do_locking:
4657       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4658       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4659       lu.needed_locks[locking.LEVEL_NODE] = []
4660       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4661
4662     self.do_grouplocks = (self.do_locking and
4663                           query.IQ_NODES in self.requested_data)
4664
4665   def DeclareLocks(self, lu, level):
4666     if self.do_locking:
4667       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4668         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4669
4670         # Lock all groups used by instances optimistically; this requires going
4671         # via the node before it's locked, requiring verification later on
4672         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4673           set(group_uuid
4674               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4675               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4676       elif level == locking.LEVEL_NODE:
4677         lu._LockInstancesNodes() # pylint: disable=W0212
4678
4679   @staticmethod
4680   def _CheckGroupLocks(lu):
4681     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4682     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4683
4684     # Check if node groups for locked instances are still correct
4685     for instance_name in owned_instances:
4686       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4687
4688   def _GetQueryData(self, lu):
4689     """Computes the list of instances and their attributes.
4690
4691     """
4692     if self.do_grouplocks:
4693       self._CheckGroupLocks(lu)
4694
4695     cluster = lu.cfg.GetClusterInfo()
4696     all_info = lu.cfg.GetAllInstancesInfo()
4697
4698     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4699
4700     instance_list = [all_info[name] for name in instance_names]
4701     nodes = frozenset(itertools.chain(*(inst.all_nodes
4702                                         for inst in instance_list)))
4703     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4704     bad_nodes = []
4705     offline_nodes = []
4706     wrongnode_inst = set()
4707
4708     # Gather data as requested
4709     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4710       live_data = {}
4711       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4712       for name in nodes:
4713         result = node_data[name]
4714         if result.offline:
4715           # offline nodes will be in both lists
4716           assert result.fail_msg
4717           offline_nodes.append(name)
4718         if result.fail_msg:
4719           bad_nodes.append(name)
4720         elif result.payload:
4721           for inst in result.payload:
4722             if inst in all_info:
4723               if all_info[inst].primary_node == name:
4724                 live_data.update(result.payload)
4725               else:
4726                 wrongnode_inst.add(inst)
4727             else:
4728               # orphan instance; we don't list it here as we don't
4729               # handle this case yet in the output of instance listing
4730               logging.warning("Orphan instance '%s' found on node %s",
4731                               inst, name)
4732         # else no instance is alive
4733     else:
4734       live_data = {}
4735
4736     if query.IQ_DISKUSAGE in self.requested_data:
4737       disk_usage = dict((inst.name,
4738                          _ComputeDiskSize(inst.disk_template,
4739                                           [{constants.IDISK_SIZE: disk.size}
4740                                            for disk in inst.disks]))
4741                         for inst in instance_list)
4742     else:
4743       disk_usage = None
4744
4745     if query.IQ_CONSOLE in self.requested_data:
4746       consinfo = {}
4747       for inst in instance_list:
4748         if inst.name in live_data:
4749           # Instance is running
4750           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4751         else:
4752           consinfo[inst.name] = None
4753       assert set(consinfo.keys()) == set(instance_names)
4754     else:
4755       consinfo = None
4756
4757     if query.IQ_NODES in self.requested_data:
4758       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4759                                             instance_list)))
4760       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4761       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4762                     for uuid in set(map(operator.attrgetter("group"),
4763                                         nodes.values())))
4764     else:
4765       nodes = None
4766       groups = None
4767
4768     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4769                                    disk_usage, offline_nodes, bad_nodes,
4770                                    live_data, wrongnode_inst, consinfo,
4771                                    nodes, groups)
4772
4773
4774 class LUQuery(NoHooksLU):
4775   """Query for resources/items of a certain kind.
4776
4777   """
4778   # pylint: disable=W0142
4779   REQ_BGL = False
4780
4781   def CheckArguments(self):
4782     qcls = _GetQueryImplementation(self.op.what)
4783
4784     self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4785
4786   def ExpandNames(self):
4787     self.impl.ExpandNames(self)
4788
4789   def DeclareLocks(self, level):
4790     self.impl.DeclareLocks(self, level)
4791
4792   def Exec(self, feedback_fn):
4793     return self.impl.NewStyleQuery(self)
4794
4795
4796 class LUQueryFields(NoHooksLU):
4797   """Query for resources/items of a certain kind.
4798
4799   """
4800   # pylint: disable=W0142
4801   REQ_BGL = False
4802
4803   def CheckArguments(self):
4804     self.qcls = _GetQueryImplementation(self.op.what)
4805
4806   def ExpandNames(self):
4807     self.needed_locks = {}
4808
4809   def Exec(self, feedback_fn):
4810     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4811
4812
4813 class LUNodeModifyStorage(NoHooksLU):
4814   """Logical unit for modifying a storage volume on a node.
4815
4816   """
4817   REQ_BGL = False
4818
4819   def CheckArguments(self):
4820     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4821
4822     storage_type = self.op.storage_type
4823
4824     try:
4825       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4826     except KeyError:
4827       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4828                                  " modified" % storage_type,
4829                                  errors.ECODE_INVAL)
4830
4831     diff = set(self.op.changes.keys()) - modifiable
4832     if diff:
4833       raise errors.OpPrereqError("The following fields can not be modified for"
4834                                  " storage units of type '%s': %r" %
4835                                  (storage_type, list(diff)),
4836                                  errors.ECODE_INVAL)
4837
4838   def ExpandNames(self):
4839     self.needed_locks = {
4840       locking.LEVEL_NODE: self.op.node_name,
4841       }
4842
4843   def Exec(self, feedback_fn):
4844     """Computes the list of nodes and their attributes.
4845
4846     """
4847     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4848     result = self.rpc.call_storage_modify(self.op.node_name,
4849                                           self.op.storage_type, st_args,
4850                                           self.op.name, self.op.changes)
4851     result.Raise("Failed to modify storage unit '%s' on %s" %
4852                  (self.op.name, self.op.node_name))
4853
4854
4855 class LUNodeAdd(LogicalUnit):
4856   """Logical unit for adding node to the cluster.
4857
4858   """
4859   HPATH = "node-add"
4860   HTYPE = constants.HTYPE_NODE
4861   _NFLAGS = ["master_capable", "vm_capable"]
4862
4863   def CheckArguments(self):
4864     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4865     # validate/normalize the node name
4866     self.hostname = netutils.GetHostname(name=self.op.node_name,
4867                                          family=self.primary_ip_family)
4868     self.op.node_name = self.hostname.name
4869
4870     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4871       raise errors.OpPrereqError("Cannot readd the master node",
4872                                  errors.ECODE_STATE)
4873
4874     if self.op.readd and self.op.group:
4875       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4876                                  " being readded", errors.ECODE_INVAL)
4877
4878   def BuildHooksEnv(self):
4879     """Build hooks env.
4880
4881     This will run on all nodes before, and on all nodes + the new node after.
4882
4883     """
4884     return {
4885       "OP_TARGET": self.op.node_name,
4886       "NODE_NAME": self.op.node_name,
4887       "NODE_PIP": self.op.primary_ip,
4888       "NODE_SIP": self.op.secondary_ip,
4889       "MASTER_CAPABLE": str(self.op.master_capable),
4890       "VM_CAPABLE": str(self.op.vm_capable),
4891       }
4892
4893   def BuildHooksNodes(self):
4894     """Build hooks nodes.
4895
4896     """
4897     # Exclude added node
4898     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4899     post_nodes = pre_nodes + [self.op.node_name, ]
4900
4901     return (pre_nodes, post_nodes)
4902
4903   def CheckPrereq(self):
4904     """Check prerequisites.
4905
4906     This checks:
4907      - the new node is not already in the config
4908      - it is resolvable
4909      - its parameters (single/dual homed) matches the cluster
4910
4911     Any errors are signaled by raising errors.OpPrereqError.
4912
4913     """
4914     cfg = self.cfg
4915     hostname = self.hostname
4916     node = hostname.name
4917     primary_ip = self.op.primary_ip = hostname.ip
4918     if self.op.secondary_ip is None:
4919       if self.primary_ip_family == netutils.IP6Address.family:
4920         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4921                                    " IPv4 address must be given as secondary",
4922                                    errors.ECODE_INVAL)
4923       self.op.secondary_ip = primary_ip
4924
4925     secondary_ip = self.op.secondary_ip
4926     if not netutils.IP4Address.IsValid(secondary_ip):
4927       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4928                                  " address" % secondary_ip, errors.ECODE_INVAL)
4929
4930     node_list = cfg.GetNodeList()
4931     if not self.op.readd and node in node_list:
4932       raise errors.OpPrereqError("Node %s is already in the configuration" %
4933                                  node, errors.ECODE_EXISTS)
4934     elif self.op.readd and node not in node_list:
4935       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4936                                  errors.ECODE_NOENT)
4937
4938     self.changed_primary_ip = False
4939
4940     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4941       if self.op.readd and node == existing_node_name:
4942         if existing_node.secondary_ip != secondary_ip:
4943           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4944                                      " address configuration as before",
4945                                      errors.ECODE_INVAL)
4946         if existing_node.primary_ip != primary_ip:
4947           self.changed_primary_ip = True
4948
4949         continue
4950
4951       if (existing_node.primary_ip == primary_ip or
4952           existing_node.secondary_ip == primary_ip or
4953           existing_node.primary_ip == secondary_ip or
4954           existing_node.secondary_ip == secondary_ip):
4955         raise errors.OpPrereqError("New node ip address(es) conflict with"
4956                                    " existing node %s" % existing_node.name,
4957                                    errors.ECODE_NOTUNIQUE)
4958
4959     # After this 'if' block, None is no longer a valid value for the
4960     # _capable op attributes
4961     if self.op.readd:
4962       old_node = self.cfg.GetNodeInfo(node)
4963       assert old_node is not None, "Can't retrieve locked node %s" % node
4964       for attr in self._NFLAGS:
4965         if getattr(self.op, attr) is None:
4966           setattr(self.op, attr, getattr(old_node, attr))
4967     else:
4968       for attr in self._NFLAGS:
4969         if getattr(self.op, attr) is None:
4970           setattr(self.op, attr, True)
4971
4972     if self.op.readd and not self.op.vm_capable:
4973       pri, sec = cfg.GetNodeInstances(node)
4974       if pri or sec:
4975         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4976                                    " flag set to false, but it already holds"
4977                                    " instances" % node,
4978                                    errors.ECODE_STATE)
4979
4980     # check that the type of the node (single versus dual homed) is the
4981     # same as for the master
4982     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4983     master_singlehomed = myself.secondary_ip == myself.primary_ip
4984     newbie_singlehomed = secondary_ip == primary_ip
4985     if master_singlehomed != newbie_singlehomed:
4986       if master_singlehomed:
4987         raise errors.OpPrereqError("The master has no secondary ip but the"
4988                                    " new node has one",
4989                                    errors.ECODE_INVAL)
4990       else:
4991         raise errors.OpPrereqError("The master has a secondary ip but the"
4992                                    " new node doesn't have one",
4993                                    errors.ECODE_INVAL)
4994
4995     # checks reachability
4996     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4997       raise errors.OpPrereqError("Node not reachable by ping",
4998                                  errors.ECODE_ENVIRON)
4999
5000     if not newbie_singlehomed:
5001       # check reachability from my secondary ip to newbie's secondary ip
5002       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5003                            source=myself.secondary_ip):
5004         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5005                                    " based ping to node daemon port",
5006                                    errors.ECODE_ENVIRON)
5007
5008     if self.op.readd:
5009       exceptions = [node]
5010     else:
5011       exceptions = []
5012
5013     if self.op.master_capable:
5014       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5015     else:
5016       self.master_candidate = False
5017
5018     if self.op.readd:
5019       self.new_node = old_node
5020     else:
5021       node_group = cfg.LookupNodeGroup(self.op.group)
5022       self.new_node = objects.Node(name=node,
5023                                    primary_ip=primary_ip,
5024                                    secondary_ip=secondary_ip,
5025                                    master_candidate=self.master_candidate,
5026                                    offline=False, drained=False,
5027                                    group=node_group)
5028
5029     if self.op.ndparams:
5030       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5031
5032   def Exec(self, feedback_fn):
5033     """Adds the new node to the cluster.
5034
5035     """
5036     new_node = self.new_node
5037     node = new_node.name
5038
5039     # We adding a new node so we assume it's powered
5040     new_node.powered = True
5041
5042     # for re-adds, reset the offline/drained/master-candidate flags;
5043     # we need to reset here, otherwise offline would prevent RPC calls
5044     # later in the procedure; this also means that if the re-add
5045     # fails, we are left with a non-offlined, broken node
5046     if self.op.readd:
5047       new_node.drained = new_node.offline = False # pylint: disable=W0201
5048       self.LogInfo("Readding a node, the offline/drained flags were reset")
5049       # if we demote the node, we do cleanup later in the procedure
5050       new_node.master_candidate = self.master_candidate
5051       if self.changed_primary_ip:
5052         new_node.primary_ip = self.op.primary_ip
5053
5054     # copy the master/vm_capable flags
5055     for attr in self._NFLAGS:
5056       setattr(new_node, attr, getattr(self.op, attr))
5057
5058     # notify the user about any possible mc promotion
5059     if new_node.master_candidate:
5060       self.LogInfo("Node will be a master candidate")
5061
5062     if self.op.ndparams:
5063       new_node.ndparams = self.op.ndparams
5064     else:
5065       new_node.ndparams = {}
5066
5067     # check connectivity
5068     result = self.rpc.call_version([node])[node]
5069     result.Raise("Can't get version information from node %s" % node)
5070     if constants.PROTOCOL_VERSION == result.payload:
5071       logging.info("Communication to node %s fine, sw version %s match",
5072                    node, result.payload)
5073     else:
5074       raise errors.OpExecError("Version mismatch master version %s,"
5075                                " node version %s" %
5076                                (constants.PROTOCOL_VERSION, result.payload))
5077
5078     # Add node to our /etc/hosts, and add key to known_hosts
5079     if self.cfg.GetClusterInfo().modify_etc_hosts:
5080       master_node = self.cfg.GetMasterNode()
5081       result = self.rpc.call_etc_hosts_modify(master_node,
5082                                               constants.ETC_HOSTS_ADD,
5083                                               self.hostname.name,
5084                                               self.hostname.ip)
5085       result.Raise("Can't update hosts file with new host data")
5086
5087     if new_node.secondary_ip != new_node.primary_ip:
5088       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5089                                False)
5090
5091     node_verify_list = [self.cfg.GetMasterNode()]
5092     node_verify_param = {
5093       constants.NV_NODELIST: ([node], {}),
5094       # TODO: do a node-net-test as well?
5095     }
5096
5097     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5098                                        self.cfg.GetClusterName())
5099     for verifier in node_verify_list:
5100       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5101       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5102       if nl_payload:
5103         for failed in nl_payload:
5104           feedback_fn("ssh/hostname verification failed"
5105                       " (checking from %s): %s" %
5106                       (verifier, nl_payload[failed]))
5107         raise errors.OpExecError("ssh/hostname verification failed")
5108
5109     if self.op.readd:
5110       _RedistributeAncillaryFiles(self)
5111       self.context.ReaddNode(new_node)
5112       # make sure we redistribute the config
5113       self.cfg.Update(new_node, feedback_fn)
5114       # and make sure the new node will not have old files around
5115       if not new_node.master_candidate:
5116         result = self.rpc.call_node_demote_from_mc(new_node.name)
5117         msg = result.fail_msg
5118         if msg:
5119           self.LogWarning("Node failed to demote itself from master"
5120                           " candidate status: %s" % msg)
5121     else:
5122       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5123                                   additional_vm=self.op.vm_capable)
5124       self.context.AddNode(new_node, self.proc.GetECId())
5125
5126
5127 class LUNodeSetParams(LogicalUnit):
5128   """Modifies the parameters of a node.
5129
5130   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5131       to the node role (as _ROLE_*)
5132   @cvar _R2F: a dictionary from node role to tuples of flags
5133   @cvar _FLAGS: a list of attribute names corresponding to the flags
5134
5135   """
5136   HPATH = "node-modify"
5137   HTYPE = constants.HTYPE_NODE
5138   REQ_BGL = False
5139   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5140   _F2R = {
5141     (True, False, False): _ROLE_CANDIDATE,
5142     (False, True, False): _ROLE_DRAINED,
5143     (False, False, True): _ROLE_OFFLINE,
5144     (False, False, False): _ROLE_REGULAR,
5145     }
5146   _R2F = dict((v, k) for k, v in _F2R.items())
5147   _FLAGS = ["master_candidate", "drained", "offline"]
5148
5149   def CheckArguments(self):
5150     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5151     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5152                 self.op.master_capable, self.op.vm_capable,
5153                 self.op.secondary_ip, self.op.ndparams]
5154     if all_mods.count(None) == len(all_mods):
5155       raise errors.OpPrereqError("Please pass at least one modification",
5156                                  errors.ECODE_INVAL)
5157     if all_mods.count(True) > 1:
5158       raise errors.OpPrereqError("Can't set the node into more than one"
5159                                  " state at the same time",
5160                                  errors.ECODE_INVAL)
5161
5162     # Boolean value that tells us whether we might be demoting from MC
5163     self.might_demote = (self.op.master_candidate == False or
5164                          self.op.offline == True or
5165                          self.op.drained == True or
5166                          self.op.master_capable == False)
5167
5168     if self.op.secondary_ip:
5169       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5170         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5171                                    " address" % self.op.secondary_ip,
5172                                    errors.ECODE_INVAL)
5173
5174     self.lock_all = self.op.auto_promote and self.might_demote
5175     self.lock_instances = self.op.secondary_ip is not None
5176
5177   def ExpandNames(self):
5178     if self.lock_all:
5179       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5180     else:
5181       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5182
5183     if self.lock_instances:
5184       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5185
5186   def DeclareLocks(self, level):
5187     # If we have locked all instances, before waiting to lock nodes, release
5188     # all the ones living on nodes unrelated to the current operation.
5189     if level == locking.LEVEL_NODE and self.lock_instances:
5190       self.affected_instances = []
5191       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5192         instances_keep = []
5193
5194         # Build list of instances to release
5195         locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5196         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5197           if (instance.disk_template in constants.DTS_INT_MIRROR and
5198               self.op.node_name in instance.all_nodes):
5199             instances_keep.append(instance_name)
5200             self.affected_instances.append(instance)
5201
5202         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5203
5204         assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5205                 set(instances_keep))
5206
5207   def BuildHooksEnv(self):
5208     """Build hooks env.
5209
5210     This runs on the master node.
5211
5212     """
5213     return {
5214       "OP_TARGET": self.op.node_name,
5215       "MASTER_CANDIDATE": str(self.op.master_candidate),
5216       "OFFLINE": str(self.op.offline),
5217       "DRAINED": str(self.op.drained),
5218       "MASTER_CAPABLE": str(self.op.master_capable),
5219       "VM_CAPABLE": str(self.op.vm_capable),
5220       }
5221
5222   def BuildHooksNodes(self):
5223     """Build hooks nodes.
5224
5225     """
5226     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5227     return (nl, nl)
5228
5229   def CheckPrereq(self):
5230     """Check prerequisites.
5231
5232     This only checks the instance list against the existing names.
5233
5234     """
5235     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5236
5237     if (self.op.master_candidate is not None or
5238         self.op.drained is not None or
5239         self.op.offline is not None):
5240       # we can't change the master's node flags
5241       if self.op.node_name == self.cfg.GetMasterNode():
5242         raise errors.OpPrereqError("The master role can be changed"
5243                                    " only via master-failover",
5244                                    errors.ECODE_INVAL)
5245
5246     if self.op.master_candidate and not node.master_capable:
5247       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5248                                  " it a master candidate" % node.name,
5249                                  errors.ECODE_STATE)
5250
5251     if self.op.vm_capable == False:
5252       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5253       if ipri or isec:
5254         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5255                                    " the vm_capable flag" % node.name,
5256                                    errors.ECODE_STATE)
5257
5258     if node.master_candidate and self.might_demote and not self.lock_all:
5259       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5260       # check if after removing the current node, we're missing master
5261       # candidates
5262       (mc_remaining, mc_should, _) = \
5263           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5264       if mc_remaining < mc_should:
5265         raise errors.OpPrereqError("Not enough master candidates, please"
5266                                    " pass auto promote option to allow"
5267                                    " promotion", errors.ECODE_STATE)
5268
5269     self.old_flags = old_flags = (node.master_candidate,
5270                                   node.drained, node.offline)
5271     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5272     self.old_role = old_role = self._F2R[old_flags]
5273
5274     # Check for ineffective changes
5275     for attr in self._FLAGS:
5276       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5277         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5278         setattr(self.op, attr, None)
5279
5280     # Past this point, any flag change to False means a transition
5281     # away from the respective state, as only real changes are kept
5282
5283     # TODO: We might query the real power state if it supports OOB
5284     if _SupportsOob(self.cfg, node):
5285       if self.op.offline is False and not (node.powered or
5286                                            self.op.powered == True):
5287         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5288                                     " offline status can be reset") %
5289                                    self.op.node_name)
5290     elif self.op.powered is not None:
5291       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5292                                   " as it does not support out-of-band"
5293                                   " handling") % self.op.node_name)
5294
5295     # If we're being deofflined/drained, we'll MC ourself if needed
5296     if (self.op.drained == False or self.op.offline == False or
5297         (self.op.master_capable and not node.master_capable)):
5298       if _DecideSelfPromotion(self):
5299         self.op.master_candidate = True
5300         self.LogInfo("Auto-promoting node to master candidate")
5301
5302     # If we're no longer master capable, we'll demote ourselves from MC
5303     if self.op.master_capable == False and node.master_candidate:
5304       self.LogInfo("Demoting from master candidate")
5305       self.op.master_candidate = False
5306
5307     # Compute new role
5308     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5309     if self.op.master_candidate:
5310       new_role = self._ROLE_CANDIDATE
5311     elif self.op.drained:
5312       new_role = self._ROLE_DRAINED
5313     elif self.op.offline:
5314       new_role = self._ROLE_OFFLINE
5315     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5316       # False is still in new flags, which means we're un-setting (the
5317       # only) True flag
5318       new_role = self._ROLE_REGULAR
5319     else: # no new flags, nothing, keep old role
5320       new_role = old_role
5321
5322     self.new_role = new_role
5323
5324     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5325       # Trying to transition out of offline status
5326       result = self.rpc.call_version([node.name])[node.name]
5327       if result.fail_msg:
5328         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5329                                    " to report its version: %s" %
5330                                    (node.name, result.fail_msg),
5331                                    errors.ECODE_STATE)
5332       else:
5333         self.LogWarning("Transitioning node from offline to online state"
5334                         " without using re-add. Please make sure the node"
5335                         " is healthy!")
5336
5337     if self.op.secondary_ip:
5338       # Ok even without locking, because this can't be changed by any LU
5339       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5340       master_singlehomed = master.secondary_ip == master.primary_ip
5341       if master_singlehomed and self.op.secondary_ip:
5342         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5343                                    " homed cluster", errors.ECODE_INVAL)
5344
5345       if node.offline:
5346         if self.affected_instances:
5347           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5348                                      " node has instances (%s) configured"
5349                                      " to use it" % self.affected_instances)
5350       else:
5351         # On online nodes, check that no instances are running, and that
5352         # the node has the new ip and we can reach it.
5353         for instance in self.affected_instances:
5354           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5355
5356         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5357         if master.name != node.name:
5358           # check reachability from master secondary ip to new secondary ip
5359           if not netutils.TcpPing(self.op.secondary_ip,
5360                                   constants.DEFAULT_NODED_PORT,
5361                                   source=master.secondary_ip):
5362             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5363                                        " based ping to node daemon port",
5364                                        errors.ECODE_ENVIRON)
5365
5366     if self.op.ndparams:
5367       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5368       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5369       self.new_ndparams = new_ndparams
5370
5371   def Exec(self, feedback_fn):
5372     """Modifies a node.
5373
5374     """
5375     node = self.node
5376     old_role = self.old_role
5377     new_role = self.new_role
5378
5379     result = []
5380
5381     if self.op.ndparams:
5382       node.ndparams = self.new_ndparams
5383
5384     if self.op.powered is not None:
5385       node.powered = self.op.powered
5386
5387     for attr in ["master_capable", "vm_capable"]:
5388       val = getattr(self.op, attr)
5389       if val is not None:
5390         setattr(node, attr, val)
5391         result.append((attr, str(val)))
5392
5393     if new_role != old_role:
5394       # Tell the node to demote itself, if no longer MC and not offline
5395       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5396         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5397         if msg:
5398           self.LogWarning("Node failed to demote itself: %s", msg)
5399
5400       new_flags = self._R2F[new_role]
5401       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5402         if of != nf:
5403           result.append((desc, str(nf)))
5404       (node.master_candidate, node.drained, node.offline) = new_flags
5405
5406       # we locked all nodes, we adjust the CP before updating this node
5407       if self.lock_all:
5408         _AdjustCandidatePool(self, [node.name])
5409
5410     if self.op.secondary_ip:
5411       node.secondary_ip = self.op.secondary_ip
5412       result.append(("secondary_ip", self.op.secondary_ip))
5413
5414     # this will trigger configuration file update, if needed
5415     self.cfg.Update(node, feedback_fn)
5416
5417     # this will trigger job queue propagation or cleanup if the mc
5418     # flag changed
5419     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5420       self.context.ReaddNode(node)
5421
5422     return result
5423
5424
5425 class LUNodePowercycle(NoHooksLU):
5426   """Powercycles a node.
5427
5428   """
5429   REQ_BGL = False
5430
5431   def CheckArguments(self):
5432     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5433     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5434       raise errors.OpPrereqError("The node is the master and the force"
5435                                  " parameter was not set",
5436                                  errors.ECODE_INVAL)
5437
5438   def ExpandNames(self):
5439     """Locking for PowercycleNode.
5440
5441     This is a last-resort option and shouldn't block on other
5442     jobs. Therefore, we grab no locks.
5443
5444     """
5445     self.needed_locks = {}
5446
5447   def Exec(self, feedback_fn):
5448     """Reboots a node.
5449
5450     """
5451     result = self.rpc.call_node_powercycle(self.op.node_name,
5452                                            self.cfg.GetHypervisorType())
5453     result.Raise("Failed to schedule the reboot")
5454     return result.payload
5455
5456
5457 class LUClusterQuery(NoHooksLU):
5458   """Query cluster configuration.
5459
5460   """
5461   REQ_BGL = False
5462
5463   def ExpandNames(self):
5464     self.needed_locks = {}
5465
5466   def Exec(self, feedback_fn):
5467     """Return cluster config.
5468
5469     """
5470     cluster = self.cfg.GetClusterInfo()
5471     os_hvp = {}
5472
5473     # Filter just for enabled hypervisors
5474     for os_name, hv_dict in cluster.os_hvp.items():
5475       os_hvp[os_name] = {}
5476       for hv_name, hv_params in hv_dict.items():
5477         if hv_name in cluster.enabled_hypervisors:
5478           os_hvp[os_name][hv_name] = hv_params
5479
5480     # Convert ip_family to ip_version
5481     primary_ip_version = constants.IP4_VERSION
5482     if cluster.primary_ip_family == netutils.IP6Address.family:
5483       primary_ip_version = constants.IP6_VERSION
5484
5485     result = {
5486       "software_version": constants.RELEASE_VERSION,
5487       "protocol_version": constants.PROTOCOL_VERSION,
5488       "config_version": constants.CONFIG_VERSION,
5489       "os_api_version": max(constants.OS_API_VERSIONS),
5490       "export_version": constants.EXPORT_VERSION,
5491       "architecture": (platform.architecture()[0], platform.machine()),
5492       "name": cluster.cluster_name,
5493       "master": cluster.master_node,
5494       "default_hypervisor": cluster.enabled_hypervisors[0],
5495       "enabled_hypervisors": cluster.enabled_hypervisors,
5496       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5497                         for hypervisor_name in cluster.enabled_hypervisors]),
5498       "os_hvp": os_hvp,
5499       "beparams": cluster.beparams,
5500       "osparams": cluster.osparams,
5501       "nicparams": cluster.nicparams,
5502       "ndparams": cluster.ndparams,
5503       "candidate_pool_size": cluster.candidate_pool_size,
5504       "master_netdev": cluster.master_netdev,
5505       "volume_group_name": cluster.volume_group_name,
5506       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5507       "file_storage_dir": cluster.file_storage_dir,
5508       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5509       "maintain_node_health": cluster.maintain_node_health,
5510       "ctime": cluster.ctime,
5511       "mtime": cluster.mtime,
5512       "uuid": cluster.uuid,
5513       "tags": list(cluster.GetTags()),
5514       "uid_pool": cluster.uid_pool,
5515       "default_iallocator": cluster.default_iallocator,
5516       "reserved_lvs": cluster.reserved_lvs,
5517       "primary_ip_version": primary_ip_version,
5518       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5519       "hidden_os": cluster.hidden_os,
5520       "blacklisted_os": cluster.blacklisted_os,
5521       }
5522
5523     return result
5524
5525
5526 class LUClusterConfigQuery(NoHooksLU):
5527   """Return configuration values.
5528
5529   """
5530   REQ_BGL = False
5531   _FIELDS_DYNAMIC = utils.FieldSet()
5532   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5533                                   "watcher_pause", "volume_group_name")
5534
5535   def CheckArguments(self):
5536     _CheckOutputFields(static=self._FIELDS_STATIC,
5537                        dynamic=self._FIELDS_DYNAMIC,
5538                        selected=self.op.output_fields)
5539
5540   def ExpandNames(self):
5541     self.needed_locks = {}
5542
5543   def Exec(self, feedback_fn):
5544     """Dump a representation of the cluster config to the standard output.
5545
5546     """
5547     values = []
5548     for field in self.op.output_fields:
5549       if field == "cluster_name":
5550         entry = self.cfg.GetClusterName()
5551       elif field == "master_node":
5552         entry = self.cfg.GetMasterNode()
5553       elif field == "drain_flag":
5554         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5555       elif field == "watcher_pause":
5556         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5557       elif field == "volume_group_name":
5558         entry = self.cfg.GetVGName()
5559       else:
5560         raise errors.ParameterError(field)
5561       values.append(entry)
5562     return values
5563
5564
5565 class LUInstanceActivateDisks(NoHooksLU):
5566   """Bring up an instance's disks.
5567
5568   """
5569   REQ_BGL = False
5570
5571   def ExpandNames(self):
5572     self._ExpandAndLockInstance()
5573     self.needed_locks[locking.LEVEL_NODE] = []
5574     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5575
5576   def DeclareLocks(self, level):
5577     if level == locking.LEVEL_NODE:
5578       self._LockInstancesNodes()
5579
5580   def CheckPrereq(self):
5581     """Check prerequisites.
5582
5583     This checks that the instance is in the cluster.
5584
5585     """
5586     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5587     assert self.instance is not None, \
5588       "Cannot retrieve locked instance %s" % self.op.instance_name
5589     _CheckNodeOnline(self, self.instance.primary_node)
5590
5591   def Exec(self, feedback_fn):
5592     """Activate the disks.
5593
5594     """
5595     disks_ok, disks_info = \
5596               _AssembleInstanceDisks(self, self.instance,
5597                                      ignore_size=self.op.ignore_size)
5598     if not disks_ok:
5599       raise errors.OpExecError("Cannot activate block devices")
5600
5601     return disks_info
5602
5603
5604 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5605                            ignore_size=False):
5606   """Prepare the block devices for an instance.
5607
5608   This sets up the block devices on all nodes.
5609
5610   @type lu: L{LogicalUnit}
5611   @param lu: the logical unit on whose behalf we execute
5612   @type instance: L{objects.Instance}
5613   @param instance: the instance for whose disks we assemble
5614   @type disks: list of L{objects.Disk} or None
5615   @param disks: which disks to assemble (or all, if None)
5616   @type ignore_secondaries: boolean
5617   @param ignore_secondaries: if true, errors on secondary nodes
5618       won't result in an error return from the function
5619   @type ignore_size: boolean
5620   @param ignore_size: if true, the current known size of the disk
5621       will not be used during the disk activation, useful for cases
5622       when the size is wrong
5623   @return: False if the operation failed, otherwise a list of
5624       (host, instance_visible_name, node_visible_name)
5625       with the mapping from node devices to instance devices
5626
5627   """
5628   device_info = []
5629   disks_ok = True
5630   iname = instance.name
5631   disks = _ExpandCheckDisks(instance, disks)
5632
5633   # With the two passes mechanism we try to reduce the window of
5634   # opportunity for the race condition of switching DRBD to primary
5635   # before handshaking occured, but we do not eliminate it
5636
5637   # The proper fix would be to wait (with some limits) until the
5638   # connection has been made and drbd transitions from WFConnection
5639   # into any other network-connected state (Connected, SyncTarget,
5640   # SyncSource, etc.)
5641
5642   # 1st pass, assemble on all nodes in secondary mode
5643   for idx, inst_disk in enumerate(disks):
5644     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5645       if ignore_size:
5646         node_disk = node_disk.Copy()
5647         node_disk.UnsetSize()
5648       lu.cfg.SetDiskID(node_disk, node)
5649       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5650       msg = result.fail_msg
5651       if msg:
5652         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5653                            " (is_primary=False, pass=1): %s",
5654                            inst_disk.iv_name, node, msg)
5655         if not ignore_secondaries:
5656           disks_ok = False
5657
5658   # FIXME: race condition on drbd migration to primary
5659
5660   # 2nd pass, do only the primary node
5661   for idx, inst_disk in enumerate(disks):
5662     dev_path = None
5663
5664     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5665       if node != instance.primary_node:
5666         continue
5667       if ignore_size:
5668         node_disk = node_disk.Copy()
5669         node_disk.UnsetSize()
5670       lu.cfg.SetDiskID(node_disk, node)
5671       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5672       msg = result.fail_msg
5673       if msg:
5674         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5675                            " (is_primary=True, pass=2): %s",
5676                            inst_disk.iv_name, node, msg)
5677         disks_ok = False
5678       else:
5679         dev_path = result.payload
5680
5681     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5682
5683   # leave the disks configured for the primary node
5684   # this is a workaround that would be fixed better by
5685   # improving the logical/physical id handling
5686   for disk in disks:
5687     lu.cfg.SetDiskID(disk, instance.primary_node)
5688
5689   return disks_ok, device_info
5690
5691
5692 def _StartInstanceDisks(lu, instance, force):
5693   """Start the disks of an instance.
5694
5695   """
5696   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5697                                            ignore_secondaries=force)
5698   if not disks_ok:
5699     _ShutdownInstanceDisks(lu, instance)
5700     if force is not None and not force:
5701       lu.proc.LogWarning("", hint="If the message above refers to a"
5702                          " secondary node,"
5703                          " you can retry the operation using '--force'.")
5704     raise errors.OpExecError("Disk consistency error")
5705
5706
5707 class LUInstanceDeactivateDisks(NoHooksLU):
5708   """Shutdown an instance's disks.
5709
5710   """
5711   REQ_BGL = False
5712
5713   def ExpandNames(self):
5714     self._ExpandAndLockInstance()
5715     self.needed_locks[locking.LEVEL_NODE] = []
5716     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5717
5718   def DeclareLocks(self, level):
5719     if level == locking.LEVEL_NODE:
5720       self._LockInstancesNodes()
5721
5722   def CheckPrereq(self):
5723     """Check prerequisites.
5724
5725     This checks that the instance is in the cluster.
5726
5727     """
5728     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5729     assert self.instance is not None, \
5730       "Cannot retrieve locked instance %s" % self.op.instance_name
5731
5732   def Exec(self, feedback_fn):
5733     """Deactivate the disks
5734
5735     """
5736     instance = self.instance
5737     if self.op.force:
5738       _ShutdownInstanceDisks(self, instance)
5739     else:
5740       _SafeShutdownInstanceDisks(self, instance)
5741
5742
5743 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5744   """Shutdown block devices of an instance.
5745
5746   This function checks if an instance is running, before calling
5747   _ShutdownInstanceDisks.
5748
5749   """
5750   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5751   _ShutdownInstanceDisks(lu, instance, disks=disks)
5752
5753
5754 def _ExpandCheckDisks(instance, disks):
5755   """Return the instance disks selected by the disks list
5756
5757   @type disks: list of L{objects.Disk} or None
5758   @param disks: selected disks
5759   @rtype: list of L{objects.Disk}
5760   @return: selected instance disks to act on
5761
5762   """
5763   if disks is None:
5764     return instance.disks
5765   else:
5766     if not set(disks).issubset(instance.disks):
5767       raise errors.ProgrammerError("Can only act on disks belonging to the"
5768                                    " target instance")
5769     return disks
5770
5771
5772 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5773   """Shutdown block devices of an instance.
5774
5775   This does the shutdown on all nodes of the instance.
5776
5777   If the ignore_primary is false, errors on the primary node are
5778   ignored.
5779
5780   """
5781   all_result = True
5782   disks = _ExpandCheckDisks(instance, disks)
5783
5784   for disk in disks:
5785     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5786       lu.cfg.SetDiskID(top_disk, node)
5787       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5788       msg = result.fail_msg
5789       if msg:
5790         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5791                       disk.iv_name, node, msg)
5792         if ((node == instance.primary_node and not ignore_primary) or
5793             (node != instance.primary_node and not result.offline)):
5794           all_result = False
5795   return all_result
5796
5797
5798 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5799   """Checks if a node has enough free memory.
5800
5801   This function check if a given node has the needed amount of free
5802   memory. In case the node has less memory or we cannot get the
5803   information from the node, this function raise an OpPrereqError
5804   exception.
5805
5806   @type lu: C{LogicalUnit}
5807   @param lu: a logical unit from which we get configuration data
5808   @type node: C{str}
5809   @param node: the node to check
5810   @type reason: C{str}
5811   @param reason: string to use in the error message
5812   @type requested: C{int}
5813   @param requested: the amount of memory in MiB to check for
5814   @type hypervisor_name: C{str}
5815   @param hypervisor_name: the hypervisor to ask for memory stats
5816   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5817       we cannot check the node
5818
5819   """
5820   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5821   nodeinfo[node].Raise("Can't get data from node %s" % node,
5822                        prereq=True, ecode=errors.ECODE_ENVIRON)
5823   free_mem = nodeinfo[node].payload.get("memory_free", None)
5824   if not isinstance(free_mem, int):
5825     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5826                                " was '%s'" % (node, free_mem),
5827                                errors.ECODE_ENVIRON)
5828   if requested > free_mem:
5829     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5830                                " needed %s MiB, available %s MiB" %
5831                                (node, reason, requested, free_mem),
5832                                errors.ECODE_NORES)
5833
5834
5835 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5836   """Checks if nodes have enough free disk space in the all VGs.
5837
5838   This function check if all given nodes have the needed amount of
5839   free disk. In case any node has less disk or we cannot get the
5840   information from the node, this function raise an OpPrereqError
5841   exception.
5842
5843   @type lu: C{LogicalUnit}
5844   @param lu: a logical unit from which we get configuration data
5845   @type nodenames: C{list}
5846   @param nodenames: the list of node names to check
5847   @type req_sizes: C{dict}
5848   @param req_sizes: the hash of vg and corresponding amount of disk in
5849       MiB to check for
5850   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5851       or we cannot check the node
5852
5853   """
5854   for vg, req_size in req_sizes.items():
5855     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5856
5857
5858 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5859   """Checks if nodes have enough free disk space in the specified VG.
5860
5861   This function check if all given nodes have the needed amount of
5862   free disk. In case any node has less disk or we cannot get the
5863   information from the node, this function raise an OpPrereqError
5864   exception.
5865
5866   @type lu: C{LogicalUnit}
5867   @param lu: a logical unit from which we get configuration data
5868   @type nodenames: C{list}
5869   @param nodenames: the list of node names to check
5870   @type vg: C{str}
5871   @param vg: the volume group to check
5872   @type requested: C{int}
5873   @param requested: the amount of disk in MiB to check for
5874   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5875       or we cannot check the node
5876
5877   """
5878   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5879   for node in nodenames:
5880     info = nodeinfo[node]
5881     info.Raise("Cannot get current information from node %s" % node,
5882                prereq=True, ecode=errors.ECODE_ENVIRON)
5883     vg_free = info.payload.get("vg_free", None)
5884     if not isinstance(vg_free, int):
5885       raise errors.OpPrereqError("Can't compute free disk space on node"
5886                                  " %s for vg %s, result was '%s'" %
5887                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5888     if requested > vg_free:
5889       raise errors.OpPrereqError("Not enough disk space on target node %s"
5890                                  " vg %s: required %d MiB, available %d MiB" %
5891                                  (node, vg, requested, vg_free),
5892                                  errors.ECODE_NORES)
5893
5894
5895 class LUInstanceStartup(LogicalUnit):
5896   """Starts an instance.
5897
5898   """
5899   HPATH = "instance-start"
5900   HTYPE = constants.HTYPE_INSTANCE
5901   REQ_BGL = False
5902
5903   def CheckArguments(self):
5904     # extra beparams
5905     if self.op.beparams:
5906       # fill the beparams dict
5907       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5908
5909   def ExpandNames(self):
5910     self._ExpandAndLockInstance()
5911
5912   def BuildHooksEnv(self):
5913     """Build hooks env.
5914
5915     This runs on master, primary and secondary nodes of the instance.
5916
5917     """
5918     env = {
5919       "FORCE": self.op.force,
5920       }
5921
5922     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5923
5924     return env
5925
5926   def BuildHooksNodes(self):
5927     """Build hooks nodes.
5928
5929     """
5930     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5931     return (nl, nl)
5932
5933   def CheckPrereq(self):
5934     """Check prerequisites.
5935
5936     This checks that the instance is in the cluster.
5937
5938     """
5939     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5940     assert self.instance is not None, \
5941       "Cannot retrieve locked instance %s" % self.op.instance_name
5942
5943     # extra hvparams
5944     if self.op.hvparams:
5945       # check hypervisor parameter syntax (locally)
5946       cluster = self.cfg.GetClusterInfo()
5947       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5948       filled_hvp = cluster.FillHV(instance)
5949       filled_hvp.update(self.op.hvparams)
5950       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5951       hv_type.CheckParameterSyntax(filled_hvp)
5952       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5953
5954     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5955
5956     if self.primary_offline and self.op.ignore_offline_nodes:
5957       self.proc.LogWarning("Ignoring offline primary node")
5958
5959       if self.op.hvparams or self.op.beparams:
5960         self.proc.LogWarning("Overridden parameters are ignored")
5961     else:
5962       _CheckNodeOnline(self, instance.primary_node)
5963
5964       bep = self.cfg.GetClusterInfo().FillBE(instance)
5965
5966       # check bridges existence
5967       _CheckInstanceBridgesExist(self, instance)
5968
5969       remote_info = self.rpc.call_instance_info(instance.primary_node,
5970                                                 instance.name,
5971                                                 instance.hypervisor)
5972       remote_info.Raise("Error checking node %s" % instance.primary_node,
5973                         prereq=True, ecode=errors.ECODE_ENVIRON)
5974       if not remote_info.payload: # not running already
5975         _CheckNodeFreeMemory(self, instance.primary_node,
5976                              "starting instance %s" % instance.name,
5977                              bep[constants.BE_MEMORY], instance.hypervisor)
5978
5979   def Exec(self, feedback_fn):
5980     """Start the instance.
5981
5982     """
5983     instance = self.instance
5984     force = self.op.force
5985
5986     if not self.op.no_remember:
5987       self.cfg.MarkInstanceUp(instance.name)
5988
5989     if self.primary_offline:
5990       assert self.op.ignore_offline_nodes
5991       self.proc.LogInfo("Primary node offline, marked instance as started")
5992     else:
5993       node_current = instance.primary_node
5994
5995       _StartInstanceDisks(self, instance, force)
5996
5997       result = self.rpc.call_instance_start(node_current, instance,
5998                                             self.op.hvparams, self.op.beparams,
5999                                             self.op.startup_paused)
6000       msg = result.fail_msg
6001       if msg:
6002         _ShutdownInstanceDisks(self, instance)
6003         raise errors.OpExecError("Could not start instance: %s" % msg)
6004
6005
6006 class LUInstanceReboot(LogicalUnit):
6007   """Reboot an instance.
6008
6009   """
6010   HPATH = "instance-reboot"
6011   HTYPE = constants.HTYPE_INSTANCE
6012   REQ_BGL = False
6013
6014   def ExpandNames(self):
6015     self._ExpandAndLockInstance()
6016
6017   def BuildHooksEnv(self):
6018     """Build hooks env.
6019
6020     This runs on master, primary and secondary nodes of the instance.
6021
6022     """
6023     env = {
6024       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6025       "REBOOT_TYPE": self.op.reboot_type,
6026       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6027       }
6028
6029     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6030
6031     return env
6032
6033   def BuildHooksNodes(self):
6034     """Build hooks nodes.
6035
6036     """
6037     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6038     return (nl, nl)
6039
6040   def CheckPrereq(self):
6041     """Check prerequisites.
6042
6043     This checks that the instance is in the cluster.
6044
6045     """
6046     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6047     assert self.instance is not None, \
6048       "Cannot retrieve locked instance %s" % self.op.instance_name
6049
6050     _CheckNodeOnline(self, instance.primary_node)
6051
6052     # check bridges existence
6053     _CheckInstanceBridgesExist(self, instance)
6054
6055   def Exec(self, feedback_fn):
6056     """Reboot the instance.
6057
6058     """
6059     instance = self.instance
6060     ignore_secondaries = self.op.ignore_secondaries
6061     reboot_type = self.op.reboot_type
6062
6063     remote_info = self.rpc.call_instance_info(instance.primary_node,
6064                                               instance.name,
6065                                               instance.hypervisor)
6066     remote_info.Raise("Error checking node %s" % instance.primary_node)
6067     instance_running = bool(remote_info.payload)
6068
6069     node_current = instance.primary_node
6070
6071     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6072                                             constants.INSTANCE_REBOOT_HARD]:
6073       for disk in instance.disks:
6074         self.cfg.SetDiskID(disk, node_current)
6075       result = self.rpc.call_instance_reboot(node_current, instance,
6076                                              reboot_type,
6077                                              self.op.shutdown_timeout)
6078       result.Raise("Could not reboot instance")
6079     else:
6080       if instance_running:
6081         result = self.rpc.call_instance_shutdown(node_current, instance,
6082                                                  self.op.shutdown_timeout)
6083         result.Raise("Could not shutdown instance for full reboot")
6084         _ShutdownInstanceDisks(self, instance)
6085       else:
6086         self.LogInfo("Instance %s was already stopped, starting now",
6087                      instance.name)
6088       _StartInstanceDisks(self, instance, ignore_secondaries)
6089       result = self.rpc.call_instance_start(node_current, instance,
6090                                             None, None, False)
6091       msg = result.fail_msg
6092       if msg:
6093         _ShutdownInstanceDisks(self, instance)
6094         raise errors.OpExecError("Could not start instance for"
6095                                  " full reboot: %s" % msg)
6096
6097     self.cfg.MarkInstanceUp(instance.name)
6098
6099
6100 class LUInstanceShutdown(LogicalUnit):
6101   """Shutdown an instance.
6102
6103   """
6104   HPATH = "instance-stop"
6105   HTYPE = constants.HTYPE_INSTANCE
6106   REQ_BGL = False
6107
6108   def ExpandNames(self):
6109     self._ExpandAndLockInstance()
6110
6111   def BuildHooksEnv(self):
6112     """Build hooks env.
6113
6114     This runs on master, primary and secondary nodes of the instance.
6115
6116     """
6117     env = _BuildInstanceHookEnvByObject(self, self.instance)
6118     env["TIMEOUT"] = self.op.timeout
6119     return env
6120
6121   def BuildHooksNodes(self):
6122     """Build hooks nodes.
6123
6124     """
6125     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6126     return (nl, nl)
6127
6128   def CheckPrereq(self):
6129     """Check prerequisites.
6130
6131     This checks that the instance is in the cluster.
6132
6133     """
6134     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6135     assert self.instance is not None, \
6136       "Cannot retrieve locked instance %s" % self.op.instance_name
6137
6138     self.primary_offline = \
6139       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6140
6141     if self.primary_offline and self.op.ignore_offline_nodes:
6142       self.proc.LogWarning("Ignoring offline primary node")
6143     else:
6144       _CheckNodeOnline(self, self.instance.primary_node)
6145
6146   def Exec(self, feedback_fn):
6147     """Shutdown the instance.
6148
6149     """
6150     instance = self.instance
6151     node_current = instance.primary_node
6152     timeout = self.op.timeout
6153
6154     if not self.op.no_remember:
6155       self.cfg.MarkInstanceDown(instance.name)
6156
6157     if self.primary_offline:
6158       assert self.op.ignore_offline_nodes
6159       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6160     else:
6161       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6162       msg = result.fail_msg
6163       if msg:
6164         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6165
6166       _ShutdownInstanceDisks(self, instance)
6167
6168
6169 class LUInstanceReinstall(LogicalUnit):
6170   """Reinstall an instance.
6171
6172   """
6173   HPATH = "instance-reinstall"
6174   HTYPE = constants.HTYPE_INSTANCE
6175   REQ_BGL = False
6176
6177   def ExpandNames(self):
6178     self._ExpandAndLockInstance()
6179
6180   def BuildHooksEnv(self):
6181     """Build hooks env.
6182
6183     This runs on master, primary and secondary nodes of the instance.
6184
6185     """
6186     return _BuildInstanceHookEnvByObject(self, self.instance)
6187
6188   def BuildHooksNodes(self):
6189     """Build hooks nodes.
6190
6191     """
6192     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6193     return (nl, nl)
6194
6195   def CheckPrereq(self):
6196     """Check prerequisites.
6197
6198     This checks that the instance is in the cluster and is not running.
6199
6200     """
6201     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6202     assert instance is not None, \
6203       "Cannot retrieve locked instance %s" % self.op.instance_name
6204     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6205                      " offline, cannot reinstall")
6206     for node in instance.secondary_nodes:
6207       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6208                        " cannot reinstall")
6209
6210     if instance.disk_template == constants.DT_DISKLESS:
6211       raise errors.OpPrereqError("Instance '%s' has no disks" %
6212                                  self.op.instance_name,
6213                                  errors.ECODE_INVAL)
6214     _CheckInstanceDown(self, instance, "cannot reinstall")
6215
6216     if self.op.os_type is not None:
6217       # OS verification
6218       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6219       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6220       instance_os = self.op.os_type
6221     else:
6222       instance_os = instance.os
6223
6224     nodelist = list(instance.all_nodes)
6225
6226     if self.op.osparams:
6227       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6228       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6229       self.os_inst = i_osdict # the new dict (without defaults)
6230     else:
6231       self.os_inst = None
6232
6233     self.instance = instance
6234
6235   def Exec(self, feedback_fn):
6236     """Reinstall the instance.
6237
6238     """
6239     inst = self.instance
6240
6241     if self.op.os_type is not None:
6242       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6243       inst.os = self.op.os_type
6244       # Write to configuration
6245       self.cfg.Update(inst, feedback_fn)
6246
6247     _StartInstanceDisks(self, inst, None)
6248     try:
6249       feedback_fn("Running the instance OS create scripts...")
6250       # FIXME: pass debug option from opcode to backend
6251       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6252                                              self.op.debug_level,
6253                                              osparams=self.os_inst)
6254       result.Raise("Could not install OS for instance %s on node %s" %
6255                    (inst.name, inst.primary_node))
6256     finally:
6257       _ShutdownInstanceDisks(self, inst)
6258
6259
6260 class LUInstanceRecreateDisks(LogicalUnit):
6261   """Recreate an instance's missing disks.
6262
6263   """
6264   HPATH = "instance-recreate-disks"
6265   HTYPE = constants.HTYPE_INSTANCE
6266   REQ_BGL = False
6267
6268   def CheckArguments(self):
6269     # normalise the disk list
6270     self.op.disks = sorted(frozenset(self.op.disks))
6271
6272   def ExpandNames(self):
6273     self._ExpandAndLockInstance()
6274     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6275     if self.op.nodes:
6276       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6277       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6278     else:
6279       self.needed_locks[locking.LEVEL_NODE] = []
6280
6281   def DeclareLocks(self, level):
6282     if level == locking.LEVEL_NODE:
6283       # if we replace the nodes, we only need to lock the old primary,
6284       # otherwise we need to lock all nodes for disk re-creation
6285       primary_only = bool(self.op.nodes)
6286       self._LockInstancesNodes(primary_only=primary_only)
6287
6288   def BuildHooksEnv(self):
6289     """Build hooks env.
6290
6291     This runs on master, primary and secondary nodes of the instance.
6292
6293     """
6294     return _BuildInstanceHookEnvByObject(self, self.instance)
6295
6296   def BuildHooksNodes(self):
6297     """Build hooks nodes.
6298
6299     """
6300     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6301     return (nl, nl)
6302
6303   def CheckPrereq(self):
6304     """Check prerequisites.
6305
6306     This checks that the instance is in the cluster and is not running.
6307
6308     """
6309     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6310     assert instance is not None, \
6311       "Cannot retrieve locked instance %s" % self.op.instance_name
6312     if self.op.nodes:
6313       if len(self.op.nodes) != len(instance.all_nodes):
6314         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6315                                    " %d replacement nodes were specified" %
6316                                    (instance.name, len(instance.all_nodes),
6317                                     len(self.op.nodes)),
6318                                    errors.ECODE_INVAL)
6319       assert instance.disk_template != constants.DT_DRBD8 or \
6320           len(self.op.nodes) == 2
6321       assert instance.disk_template != constants.DT_PLAIN or \
6322           len(self.op.nodes) == 1
6323       primary_node = self.op.nodes[0]
6324     else:
6325       primary_node = instance.primary_node
6326     _CheckNodeOnline(self, primary_node)
6327
6328     if instance.disk_template == constants.DT_DISKLESS:
6329       raise errors.OpPrereqError("Instance '%s' has no disks" %
6330                                  self.op.instance_name, errors.ECODE_INVAL)
6331     # if we replace nodes *and* the old primary is offline, we don't
6332     # check
6333     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6334     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6335     if not (self.op.nodes and old_pnode.offline):
6336       _CheckInstanceDown(self, instance, "cannot recreate disks")
6337
6338     if not self.op.disks:
6339       self.op.disks = range(len(instance.disks))
6340     else:
6341       for idx in self.op.disks:
6342         if idx >= len(instance.disks):
6343           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6344                                      errors.ECODE_INVAL)
6345     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6346       raise errors.OpPrereqError("Can't recreate disks partially and"
6347                                  " change the nodes at the same time",
6348                                  errors.ECODE_INVAL)
6349     self.instance = instance
6350
6351   def Exec(self, feedback_fn):
6352     """Recreate the disks.
6353
6354     """
6355     instance = self.instance
6356
6357     to_skip = []
6358     mods = [] # keeps track of needed logical_id changes
6359
6360     for idx, disk in enumerate(instance.disks):
6361       if idx not in self.op.disks: # disk idx has not been passed in
6362         to_skip.append(idx)
6363         continue
6364       # update secondaries for disks, if needed
6365       if self.op.nodes:
6366         if disk.dev_type == constants.LD_DRBD8:
6367           # need to update the nodes and minors
6368           assert len(self.op.nodes) == 2
6369           assert len(disk.logical_id) == 6 # otherwise disk internals
6370                                            # have changed
6371           (_, _, old_port, _, _, old_secret) = disk.logical_id
6372           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6373           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6374                     new_minors[0], new_minors[1], old_secret)
6375           assert len(disk.logical_id) == len(new_id)
6376           mods.append((idx, new_id))
6377
6378     # now that we have passed all asserts above, we can apply the mods
6379     # in a single run (to avoid partial changes)
6380     for idx, new_id in mods:
6381       instance.disks[idx].logical_id = new_id
6382
6383     # change primary node, if needed
6384     if self.op.nodes:
6385       instance.primary_node = self.op.nodes[0]
6386       self.LogWarning("Changing the instance's nodes, you will have to"
6387                       " remove any disks left on the older nodes manually")
6388
6389     if self.op.nodes:
6390       self.cfg.Update(instance, feedback_fn)
6391
6392     _CreateDisks(self, instance, to_skip=to_skip)
6393
6394
6395 class LUInstanceRename(LogicalUnit):
6396   """Rename an instance.
6397
6398   """
6399   HPATH = "instance-rename"
6400   HTYPE = constants.HTYPE_INSTANCE
6401
6402   def CheckArguments(self):
6403     """Check arguments.
6404
6405     """
6406     if self.op.ip_check and not self.op.name_check:
6407       # TODO: make the ip check more flexible and not depend on the name check
6408       raise errors.OpPrereqError("IP address check requires a name check",
6409                                  errors.ECODE_INVAL)
6410
6411   def BuildHooksEnv(self):
6412     """Build hooks env.
6413
6414     This runs on master, primary and secondary nodes of the instance.
6415
6416     """
6417     env = _BuildInstanceHookEnvByObject(self, self.instance)
6418     env["INSTANCE_NEW_NAME"] = self.op.new_name
6419     return env
6420
6421   def BuildHooksNodes(self):
6422     """Build hooks nodes.
6423
6424     """
6425     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6426     return (nl, nl)
6427
6428   def CheckPrereq(self):
6429     """Check prerequisites.
6430
6431     This checks that the instance is in the cluster and is not running.
6432
6433     """
6434     self.op.instance_name = _ExpandInstanceName(self.cfg,
6435                                                 self.op.instance_name)
6436     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6437     assert instance is not None
6438     _CheckNodeOnline(self, instance.primary_node)
6439     _CheckInstanceDown(self, instance, "cannot rename")
6440     self.instance = instance
6441
6442     new_name = self.op.new_name
6443     if self.op.name_check:
6444       hostname = netutils.GetHostname(name=new_name)
6445       if hostname.name != new_name:
6446         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6447                      hostname.name)
6448       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6449         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6450                                     " same as given hostname '%s'") %
6451                                     (hostname.name, self.op.new_name),
6452                                     errors.ECODE_INVAL)
6453       new_name = self.op.new_name = hostname.name
6454       if (self.op.ip_check and
6455           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6456         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6457                                    (hostname.ip, new_name),
6458                                    errors.ECODE_NOTUNIQUE)
6459
6460     instance_list = self.cfg.GetInstanceList()
6461     if new_name in instance_list and new_name != instance.name:
6462       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6463                                  new_name, errors.ECODE_EXISTS)
6464
6465   def Exec(self, feedback_fn):
6466     """Rename the instance.
6467
6468     """
6469     inst = self.instance
6470     old_name = inst.name
6471
6472     rename_file_storage = False
6473     if (inst.disk_template in constants.DTS_FILEBASED and
6474         self.op.new_name != inst.name):
6475       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6476       rename_file_storage = True
6477
6478     self.cfg.RenameInstance(inst.name, self.op.new_name)
6479     # Change the instance lock. This is definitely safe while we hold the BGL.
6480     # Otherwise the new lock would have to be added in acquired mode.
6481     assert self.REQ_BGL
6482     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6483     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6484
6485     # re-read the instance from the configuration after rename
6486     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6487
6488     if rename_file_storage:
6489       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6490       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6491                                                      old_file_storage_dir,
6492                                                      new_file_storage_dir)
6493       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6494                    " (but the instance has been renamed in Ganeti)" %
6495                    (inst.primary_node, old_file_storage_dir,
6496                     new_file_storage_dir))
6497
6498     _StartInstanceDisks(self, inst, None)
6499     try:
6500       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6501                                                  old_name, self.op.debug_level)
6502       msg = result.fail_msg
6503       if msg:
6504         msg = ("Could not run OS rename script for instance %s on node %s"
6505                " (but the instance has been renamed in Ganeti): %s" %
6506                (inst.name, inst.primary_node, msg))
6507         self.proc.LogWarning(msg)
6508     finally:
6509       _ShutdownInstanceDisks(self, inst)
6510
6511     return inst.name
6512
6513
6514 class LUInstanceRemove(LogicalUnit):
6515   """Remove an instance.
6516
6517   """
6518   HPATH = "instance-remove"
6519   HTYPE = constants.HTYPE_INSTANCE
6520   REQ_BGL = False
6521
6522   def ExpandNames(self):
6523     self._ExpandAndLockInstance()
6524     self.needed_locks[locking.LEVEL_NODE] = []
6525     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6526
6527   def DeclareLocks(self, level):
6528     if level == locking.LEVEL_NODE:
6529       self._LockInstancesNodes()
6530
6531   def BuildHooksEnv(self):
6532     """Build hooks env.
6533
6534     This runs on master, primary and secondary nodes of the instance.
6535
6536     """
6537     env = _BuildInstanceHookEnvByObject(self, self.instance)
6538     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6539     return env
6540
6541   def BuildHooksNodes(self):
6542     """Build hooks nodes.
6543
6544     """
6545     nl = [self.cfg.GetMasterNode()]
6546     nl_post = list(self.instance.all_nodes) + nl
6547     return (nl, nl_post)
6548
6549   def CheckPrereq(self):
6550     """Check prerequisites.
6551
6552     This checks that the instance is in the cluster.
6553
6554     """
6555     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6556     assert self.instance is not None, \
6557       "Cannot retrieve locked instance %s" % self.op.instance_name
6558
6559   def Exec(self, feedback_fn):
6560     """Remove the instance.
6561
6562     """
6563     instance = self.instance
6564     logging.info("Shutting down instance %s on node %s",
6565                  instance.name, instance.primary_node)
6566
6567     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6568                                              self.op.shutdown_timeout)
6569     msg = result.fail_msg
6570     if msg:
6571       if self.op.ignore_failures:
6572         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6573       else:
6574         raise errors.OpExecError("Could not shutdown instance %s on"
6575                                  " node %s: %s" %
6576                                  (instance.name, instance.primary_node, msg))
6577
6578     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6579
6580
6581 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6582   """Utility function to remove an instance.
6583
6584   """
6585   logging.info("Removing block devices for instance %s", instance.name)
6586
6587   if not _RemoveDisks(lu, instance):
6588     if not ignore_failures:
6589       raise errors.OpExecError("Can't remove instance's disks")
6590     feedback_fn("Warning: can't remove instance's disks")
6591
6592   logging.info("Removing instance %s out of cluster config", instance.name)
6593
6594   lu.cfg.RemoveInstance(instance.name)
6595
6596   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6597     "Instance lock removal conflict"
6598
6599   # Remove lock for the instance
6600   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6601
6602
6603 class LUInstanceQuery(NoHooksLU):
6604   """Logical unit for querying instances.
6605
6606   """
6607   # pylint: disable=W0142
6608   REQ_BGL = False
6609
6610   def CheckArguments(self):
6611     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6612                              self.op.output_fields, self.op.use_locking)
6613
6614   def ExpandNames(self):
6615     self.iq.ExpandNames(self)
6616
6617   def DeclareLocks(self, level):
6618     self.iq.DeclareLocks(self, level)
6619
6620   def Exec(self, feedback_fn):
6621     return self.iq.OldStyleQuery(self)
6622
6623
6624 class LUInstanceFailover(LogicalUnit):
6625   """Failover an instance.
6626
6627   """
6628   HPATH = "instance-failover"
6629   HTYPE = constants.HTYPE_INSTANCE
6630   REQ_BGL = False
6631
6632   def CheckArguments(self):
6633     """Check the arguments.
6634
6635     """
6636     self.iallocator = getattr(self.op, "iallocator", None)
6637     self.target_node = getattr(self.op, "target_node", None)
6638
6639   def ExpandNames(self):
6640     self._ExpandAndLockInstance()
6641
6642     if self.op.target_node is not None:
6643       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6644
6645     self.needed_locks[locking.LEVEL_NODE] = []
6646     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6647
6648     ignore_consistency = self.op.ignore_consistency
6649     shutdown_timeout = self.op.shutdown_timeout
6650     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6651                                        cleanup=False,
6652                                        failover=True,
6653                                        ignore_consistency=ignore_consistency,
6654                                        shutdown_timeout=shutdown_timeout)
6655     self.tasklets = [self._migrater]
6656
6657   def DeclareLocks(self, level):
6658     if level == locking.LEVEL_NODE:
6659       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6660       if instance.disk_template in constants.DTS_EXT_MIRROR:
6661         if self.op.target_node is None:
6662           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6663         else:
6664           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6665                                                    self.op.target_node]
6666         del self.recalculate_locks[locking.LEVEL_NODE]
6667       else:
6668         self._LockInstancesNodes()
6669
6670   def BuildHooksEnv(self):
6671     """Build hooks env.
6672
6673     This runs on master, primary and secondary nodes of the instance.
6674
6675     """
6676     instance = self._migrater.instance
6677     source_node = instance.primary_node
6678     target_node = self.op.target_node
6679     env = {
6680       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6681       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6682       "OLD_PRIMARY": source_node,
6683       "NEW_PRIMARY": target_node,
6684       }
6685
6686     if instance.disk_template in constants.DTS_INT_MIRROR:
6687       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6688       env["NEW_SECONDARY"] = source_node
6689     else:
6690       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6691
6692     env.update(_BuildInstanceHookEnvByObject(self, instance))
6693
6694     return env
6695
6696   def BuildHooksNodes(self):
6697     """Build hooks nodes.
6698
6699     """
6700     instance = self._migrater.instance
6701     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6702     return (nl, nl + [instance.primary_node])
6703
6704
6705 class LUInstanceMigrate(LogicalUnit):
6706   """Migrate an instance.
6707
6708   This is migration without shutting down, compared to the failover,
6709   which is done with shutdown.
6710
6711   """
6712   HPATH = "instance-migrate"
6713   HTYPE = constants.HTYPE_INSTANCE
6714   REQ_BGL = False
6715
6716   def ExpandNames(self):
6717     self._ExpandAndLockInstance()
6718
6719     if self.op.target_node is not None:
6720       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6721
6722     self.needed_locks[locking.LEVEL_NODE] = []
6723     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6724
6725     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6726                                        cleanup=self.op.cleanup,
6727                                        failover=False,
6728                                        fallback=self.op.allow_failover)
6729     self.tasklets = [self._migrater]
6730
6731   def DeclareLocks(self, level):
6732     if level == locking.LEVEL_NODE:
6733       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6734       if instance.disk_template in constants.DTS_EXT_MIRROR:
6735         if self.op.target_node is None:
6736           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6737         else:
6738           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6739                                                    self.op.target_node]
6740         del self.recalculate_locks[locking.LEVEL_NODE]
6741       else:
6742         self._LockInstancesNodes()
6743
6744   def BuildHooksEnv(self):
6745     """Build hooks env.
6746
6747     This runs on master, primary and secondary nodes of the instance.
6748
6749     """
6750     instance = self._migrater.instance
6751     source_node = instance.primary_node
6752     target_node = self.op.target_node
6753     env = _BuildInstanceHookEnvByObject(self, instance)
6754     env.update({
6755       "MIGRATE_LIVE": self._migrater.live,
6756       "MIGRATE_CLEANUP": self.op.cleanup,
6757       "OLD_PRIMARY": source_node,
6758       "NEW_PRIMARY": target_node,
6759       })
6760
6761     if instance.disk_template in constants.DTS_INT_MIRROR:
6762       env["OLD_SECONDARY"] = target_node
6763       env["NEW_SECONDARY"] = source_node
6764     else:
6765       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6766
6767     return env
6768
6769   def BuildHooksNodes(self):
6770     """Build hooks nodes.
6771
6772     """
6773     instance = self._migrater.instance
6774     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6775     return (nl, nl + [instance.primary_node])
6776
6777
6778 class LUInstanceMove(LogicalUnit):
6779   """Move an instance by data-copying.
6780
6781   """
6782   HPATH = "instance-move"
6783   HTYPE = constants.HTYPE_INSTANCE
6784   REQ_BGL = False
6785
6786   def ExpandNames(self):
6787     self._ExpandAndLockInstance()
6788     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6789     self.op.target_node = target_node
6790     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6791     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6792
6793   def DeclareLocks(self, level):
6794     if level == locking.LEVEL_NODE:
6795       self._LockInstancesNodes(primary_only=True)
6796
6797   def BuildHooksEnv(self):
6798     """Build hooks env.
6799
6800     This runs on master, primary and secondary nodes of the instance.
6801
6802     """
6803     env = {
6804       "TARGET_NODE": self.op.target_node,
6805       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6806       }
6807     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6808     return env
6809
6810   def BuildHooksNodes(self):
6811     """Build hooks nodes.
6812
6813     """
6814     nl = [
6815       self.cfg.GetMasterNode(),
6816       self.instance.primary_node,
6817       self.op.target_node,
6818       ]
6819     return (nl, nl)
6820
6821   def CheckPrereq(self):
6822     """Check prerequisites.
6823
6824     This checks that the instance is in the cluster.
6825
6826     """
6827     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6828     assert self.instance is not None, \
6829       "Cannot retrieve locked instance %s" % self.op.instance_name
6830
6831     node = self.cfg.GetNodeInfo(self.op.target_node)
6832     assert node is not None, \
6833       "Cannot retrieve locked node %s" % self.op.target_node
6834
6835     self.target_node = target_node = node.name
6836
6837     if target_node == instance.primary_node:
6838       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6839                                  (instance.name, target_node),
6840                                  errors.ECODE_STATE)
6841
6842     bep = self.cfg.GetClusterInfo().FillBE(instance)
6843
6844     for idx, dsk in enumerate(instance.disks):
6845       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6846         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6847                                    " cannot copy" % idx, errors.ECODE_STATE)
6848
6849     _CheckNodeOnline(self, target_node)
6850     _CheckNodeNotDrained(self, target_node)
6851     _CheckNodeVmCapable(self, target_node)
6852
6853     if instance.admin_up:
6854       # check memory requirements on the secondary node
6855       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6856                            instance.name, bep[constants.BE_MEMORY],
6857                            instance.hypervisor)
6858     else:
6859       self.LogInfo("Not checking memory on the secondary node as"
6860                    " instance will not be started")
6861
6862     # check bridge existance
6863     _CheckInstanceBridgesExist(self, instance, node=target_node)
6864
6865   def Exec(self, feedback_fn):
6866     """Move an instance.
6867
6868     The move is done by shutting it down on its present node, copying
6869     the data over (slow) and starting it on the new node.
6870
6871     """
6872     instance = self.instance
6873
6874     source_node = instance.primary_node
6875     target_node = self.target_node
6876
6877     self.LogInfo("Shutting down instance %s on source node %s",
6878                  instance.name, source_node)
6879
6880     result = self.rpc.call_instance_shutdown(source_node, instance,
6881                                              self.op.shutdown_timeout)
6882     msg = result.fail_msg
6883     if msg:
6884       if self.op.ignore_consistency:
6885         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6886                              " Proceeding anyway. Please make sure node"
6887                              " %s is down. Error details: %s",
6888                              instance.name, source_node, source_node, msg)
6889       else:
6890         raise errors.OpExecError("Could not shutdown instance %s on"
6891                                  " node %s: %s" %
6892                                  (instance.name, source_node, msg))
6893
6894     # create the target disks
6895     try:
6896       _CreateDisks(self, instance, target_node=target_node)
6897     except errors.OpExecError:
6898       self.LogWarning("Device creation failed, reverting...")
6899       try:
6900         _RemoveDisks(self, instance, target_node=target_node)
6901       finally:
6902         self.cfg.ReleaseDRBDMinors(instance.name)
6903         raise
6904
6905     cluster_name = self.cfg.GetClusterInfo().cluster_name
6906
6907     errs = []
6908     # activate, get path, copy the data over
6909     for idx, disk in enumerate(instance.disks):
6910       self.LogInfo("Copying data for disk %d", idx)
6911       result = self.rpc.call_blockdev_assemble(target_node, disk,
6912                                                instance.name, True, idx)
6913       if result.fail_msg:
6914         self.LogWarning("Can't assemble newly created disk %d: %s",
6915                         idx, result.fail_msg)
6916         errs.append(result.fail_msg)
6917         break
6918       dev_path = result.payload
6919       result = self.rpc.call_blockdev_export(source_node, disk,
6920                                              target_node, dev_path,
6921                                              cluster_name)
6922       if result.fail_msg:
6923         self.LogWarning("Can't copy data over for disk %d: %s",
6924                         idx, result.fail_msg)
6925         errs.append(result.fail_msg)
6926         break
6927
6928     if errs:
6929       self.LogWarning("Some disks failed to copy, aborting")
6930       try:
6931         _RemoveDisks(self, instance, target_node=target_node)
6932       finally:
6933         self.cfg.ReleaseDRBDMinors(instance.name)
6934         raise errors.OpExecError("Errors during disk copy: %s" %
6935                                  (",".join(errs),))
6936
6937     instance.primary_node = target_node
6938     self.cfg.Update(instance, feedback_fn)
6939
6940     self.LogInfo("Removing the disks on the original node")
6941     _RemoveDisks(self, instance, target_node=source_node)
6942
6943     # Only start the instance if it's marked as up
6944     if instance.admin_up:
6945       self.LogInfo("Starting instance %s on node %s",
6946                    instance.name, target_node)
6947
6948       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6949                                            ignore_secondaries=True)
6950       if not disks_ok:
6951         _ShutdownInstanceDisks(self, instance)
6952         raise errors.OpExecError("Can't activate the instance's disks")
6953
6954       result = self.rpc.call_instance_start(target_node, instance,
6955                                             None, None, False)
6956       msg = result.fail_msg
6957       if msg:
6958         _ShutdownInstanceDisks(self, instance)
6959         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6960                                  (instance.name, target_node, msg))
6961
6962
6963 class LUNodeMigrate(LogicalUnit):
6964   """Migrate all instances from a node.
6965
6966   """
6967   HPATH = "node-migrate"
6968   HTYPE = constants.HTYPE_NODE
6969   REQ_BGL = False
6970
6971   def CheckArguments(self):
6972     pass
6973
6974   def ExpandNames(self):
6975     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6976
6977     self.share_locks = _ShareAll()
6978     self.needed_locks = {
6979       locking.LEVEL_NODE: [self.op.node_name],
6980       }
6981
6982   def BuildHooksEnv(self):
6983     """Build hooks env.
6984
6985     This runs on the master, the primary and all the secondaries.
6986
6987     """
6988     return {
6989       "NODE_NAME": self.op.node_name,
6990       }
6991
6992   def BuildHooksNodes(self):
6993     """Build hooks nodes.
6994
6995     """
6996     nl = [self.cfg.GetMasterNode()]
6997     return (nl, nl)
6998
6999   def CheckPrereq(self):
7000     pass
7001
7002   def Exec(self, feedback_fn):
7003     # Prepare jobs for migration instances
7004     jobs = [
7005       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7006                                  mode=self.op.mode,
7007                                  live=self.op.live,
7008                                  iallocator=self.op.iallocator,
7009                                  target_node=self.op.target_node)]
7010       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7011       ]
7012
7013     # TODO: Run iallocator in this opcode and pass correct placement options to
7014     # OpInstanceMigrate. Since other jobs can modify the cluster between
7015     # running the iallocator and the actual migration, a good consistency model
7016     # will have to be found.
7017
7018     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7019             frozenset([self.op.node_name]))
7020
7021     return ResultWithJobs(jobs)
7022
7023
7024 class TLMigrateInstance(Tasklet):
7025   """Tasklet class for instance migration.
7026
7027   @type live: boolean
7028   @ivar live: whether the migration will be done live or non-live;
7029       this variable is initalized only after CheckPrereq has run
7030   @type cleanup: boolean
7031   @ivar cleanup: Wheater we cleanup from a failed migration
7032   @type iallocator: string
7033   @ivar iallocator: The iallocator used to determine target_node
7034   @type target_node: string
7035   @ivar target_node: If given, the target_node to reallocate the instance to
7036   @type failover: boolean
7037   @ivar failover: Whether operation results in failover or migration
7038   @type fallback: boolean
7039   @ivar fallback: Whether fallback to failover is allowed if migration not
7040                   possible
7041   @type ignore_consistency: boolean
7042   @ivar ignore_consistency: Wheter we should ignore consistency between source
7043                             and target node
7044   @type shutdown_timeout: int
7045   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7046
7047   """
7048   def __init__(self, lu, instance_name, cleanup=False,
7049                failover=False, fallback=False,
7050                ignore_consistency=False,
7051                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7052     """Initializes this class.
7053
7054     """
7055     Tasklet.__init__(self, lu)
7056
7057     # Parameters
7058     self.instance_name = instance_name
7059     self.cleanup = cleanup
7060     self.live = False # will be overridden later
7061     self.failover = failover
7062     self.fallback = fallback
7063     self.ignore_consistency = ignore_consistency
7064     self.shutdown_timeout = shutdown_timeout
7065
7066   def CheckPrereq(self):
7067     """Check prerequisites.
7068
7069     This checks that the instance is in the cluster.
7070
7071     """
7072     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7073     instance = self.cfg.GetInstanceInfo(instance_name)
7074     assert instance is not None
7075     self.instance = instance
7076
7077     if (not self.cleanup and not instance.admin_up and not self.failover and
7078         self.fallback):
7079       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7080                       " to failover")
7081       self.failover = True
7082
7083     if instance.disk_template not in constants.DTS_MIRRORED:
7084       if self.failover:
7085         text = "failovers"
7086       else:
7087         text = "migrations"
7088       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7089                                  " %s" % (instance.disk_template, text),
7090                                  errors.ECODE_STATE)
7091
7092     if instance.disk_template in constants.DTS_EXT_MIRROR:
7093       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7094
7095       if self.lu.op.iallocator:
7096         self._RunAllocator()
7097       else:
7098         # We set set self.target_node as it is required by
7099         # BuildHooksEnv
7100         self.target_node = self.lu.op.target_node
7101
7102       # self.target_node is already populated, either directly or by the
7103       # iallocator run
7104       target_node = self.target_node
7105       if self.target_node == instance.primary_node:
7106         raise errors.OpPrereqError("Cannot migrate instance %s"
7107                                    " to its primary (%s)" %
7108                                    (instance.name, instance.primary_node))
7109
7110       if len(self.lu.tasklets) == 1:
7111         # It is safe to release locks only when we're the only tasklet
7112         # in the LU
7113         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7114                       keep=[instance.primary_node, self.target_node])
7115
7116     else:
7117       secondary_nodes = instance.secondary_nodes
7118       if not secondary_nodes:
7119         raise errors.ConfigurationError("No secondary node but using"
7120                                         " %s disk template" %
7121                                         instance.disk_template)
7122       target_node = secondary_nodes[0]
7123       if self.lu.op.iallocator or (self.lu.op.target_node and
7124                                    self.lu.op.target_node != target_node):
7125         if self.failover:
7126           text = "failed over"
7127         else:
7128           text = "migrated"
7129         raise errors.OpPrereqError("Instances with disk template %s cannot"
7130                                    " be %s to arbitrary nodes"
7131                                    " (neither an iallocator nor a target"
7132                                    " node can be passed)" %
7133                                    (instance.disk_template, text),
7134                                    errors.ECODE_INVAL)
7135
7136     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7137
7138     # check memory requirements on the secondary node
7139     if not self.cleanup and (not self.failover or instance.admin_up):
7140       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7141                            instance.name, i_be[constants.BE_MEMORY],
7142                            instance.hypervisor)
7143     else:
7144       self.lu.LogInfo("Not checking memory on the secondary node as"
7145                       " instance will not be started")
7146
7147     # check bridge existance
7148     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7149
7150     if not self.cleanup:
7151       _CheckNodeNotDrained(self.lu, target_node)
7152       if not self.failover:
7153         result = self.rpc.call_instance_migratable(instance.primary_node,
7154                                                    instance)
7155         if result.fail_msg and self.fallback:
7156           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7157                           " failover")
7158           self.failover = True
7159         else:
7160           result.Raise("Can't migrate, please use failover",
7161                        prereq=True, ecode=errors.ECODE_STATE)
7162
7163     assert not (self.failover and self.cleanup)
7164
7165     if not self.failover:
7166       if self.lu.op.live is not None and self.lu.op.mode is not None:
7167         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7168                                    " parameters are accepted",
7169                                    errors.ECODE_INVAL)
7170       if self.lu.op.live is not None:
7171         if self.lu.op.live:
7172           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7173         else:
7174           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7175         # reset the 'live' parameter to None so that repeated
7176         # invocations of CheckPrereq do not raise an exception
7177         self.lu.op.live = None
7178       elif self.lu.op.mode is None:
7179         # read the default value from the hypervisor
7180         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7181                                                 skip_globals=False)
7182         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7183
7184       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7185     else:
7186       # Failover is never live
7187       self.live = False
7188
7189   def _RunAllocator(self):
7190     """Run the allocator based on input opcode.
7191
7192     """
7193     ial = IAllocator(self.cfg, self.rpc,
7194                      mode=constants.IALLOCATOR_MODE_RELOC,
7195                      name=self.instance_name,
7196                      # TODO See why hail breaks with a single node below
7197                      relocate_from=[self.instance.primary_node,
7198                                     self.instance.primary_node],
7199                      )
7200
7201     ial.Run(self.lu.op.iallocator)
7202
7203     if not ial.success:
7204       raise errors.OpPrereqError("Can't compute nodes using"
7205                                  " iallocator '%s': %s" %
7206                                  (self.lu.op.iallocator, ial.info),
7207                                  errors.ECODE_NORES)
7208     if len(ial.result) != ial.required_nodes:
7209       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7210                                  " of nodes (%s), required %s" %
7211                                  (self.lu.op.iallocator, len(ial.result),
7212                                   ial.required_nodes), errors.ECODE_FAULT)
7213     self.target_node = ial.result[0]
7214     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7215                  self.instance_name, self.lu.op.iallocator,
7216                  utils.CommaJoin(ial.result))
7217
7218   def _WaitUntilSync(self):
7219     """Poll with custom rpc for disk sync.
7220
7221     This uses our own step-based rpc call.
7222
7223     """
7224     self.feedback_fn("* wait until resync is done")
7225     all_done = False
7226     while not all_done:
7227       all_done = True
7228       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7229                                             self.nodes_ip,
7230                                             self.instance.disks)
7231       min_percent = 100
7232       for node, nres in result.items():
7233         nres.Raise("Cannot resync disks on node %s" % node)
7234         node_done, node_percent = nres.payload
7235         all_done = all_done and node_done
7236         if node_percent is not None:
7237           min_percent = min(min_percent, node_percent)
7238       if not all_done:
7239         if min_percent < 100:
7240           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7241         time.sleep(2)
7242
7243   def _EnsureSecondary(self, node):
7244     """Demote a node to secondary.
7245
7246     """
7247     self.feedback_fn("* switching node %s to secondary mode" % node)
7248
7249     for dev in self.instance.disks:
7250       self.cfg.SetDiskID(dev, node)
7251
7252     result = self.rpc.call_blockdev_close(node, self.instance.name,
7253                                           self.instance.disks)
7254     result.Raise("Cannot change disk to secondary on node %s" % node)
7255
7256   def _GoStandalone(self):
7257     """Disconnect from the network.
7258
7259     """
7260     self.feedback_fn("* changing into standalone mode")
7261     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7262                                                self.instance.disks)
7263     for node, nres in result.items():
7264       nres.Raise("Cannot disconnect disks node %s" % node)
7265
7266   def _GoReconnect(self, multimaster):
7267     """Reconnect to the network.
7268
7269     """
7270     if multimaster:
7271       msg = "dual-master"
7272     else:
7273       msg = "single-master"
7274     self.feedback_fn("* changing disks into %s mode" % msg)
7275     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7276                                            self.instance.disks,
7277                                            self.instance.name, multimaster)
7278     for node, nres in result.items():
7279       nres.Raise("Cannot change disks config on node %s" % node)
7280
7281   def _ExecCleanup(self):
7282     """Try to cleanup after a failed migration.
7283
7284     The cleanup is done by:
7285       - check that the instance is running only on one node
7286         (and update the config if needed)
7287       - change disks on its secondary node to secondary
7288       - wait until disks are fully synchronized
7289       - disconnect from the network
7290       - change disks into single-master mode
7291       - wait again until disks are fully synchronized
7292
7293     """
7294     instance = self.instance
7295     target_node = self.target_node
7296     source_node = self.source_node
7297
7298     # check running on only one node
7299     self.feedback_fn("* checking where the instance actually runs"
7300                      " (if this hangs, the hypervisor might be in"
7301                      " a bad state)")
7302     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7303     for node, result in ins_l.items():
7304       result.Raise("Can't contact node %s" % node)
7305
7306     runningon_source = instance.name in ins_l[source_node].payload
7307     runningon_target = instance.name in ins_l[target_node].payload
7308
7309     if runningon_source and runningon_target:
7310       raise errors.OpExecError("Instance seems to be running on two nodes,"
7311                                " or the hypervisor is confused; you will have"
7312                                " to ensure manually that it runs only on one"
7313                                " and restart this operation")
7314
7315     if not (runningon_source or runningon_target):
7316       raise errors.OpExecError("Instance does not seem to be running at all;"
7317                                " in this case it's safer to repair by"
7318                                " running 'gnt-instance stop' to ensure disk"
7319                                " shutdown, and then restarting it")
7320
7321     if runningon_target:
7322       # the migration has actually succeeded, we need to update the config
7323       self.feedback_fn("* instance running on secondary node (%s),"
7324                        " updating config" % target_node)
7325       instance.primary_node = target_node
7326       self.cfg.Update(instance, self.feedback_fn)
7327       demoted_node = source_node
7328     else:
7329       self.feedback_fn("* instance confirmed to be running on its"
7330                        " primary node (%s)" % source_node)
7331       demoted_node = target_node
7332
7333     if instance.disk_template in constants.DTS_INT_MIRROR:
7334       self._EnsureSecondary(demoted_node)
7335       try:
7336         self._WaitUntilSync()
7337       except errors.OpExecError:
7338         # we ignore here errors, since if the device is standalone, it
7339         # won't be able to sync
7340         pass
7341       self._GoStandalone()
7342       self._GoReconnect(False)
7343       self._WaitUntilSync()
7344
7345     self.feedback_fn("* done")
7346
7347   def _RevertDiskStatus(self):
7348     """Try to revert the disk status after a failed migration.
7349
7350     """
7351     target_node = self.target_node
7352     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7353       return
7354
7355     try:
7356       self._EnsureSecondary(target_node)
7357       self._GoStandalone()
7358       self._GoReconnect(False)
7359       self._WaitUntilSync()
7360     except errors.OpExecError, err:
7361       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7362                          " please try to recover the instance manually;"
7363                          " error '%s'" % str(err))
7364
7365   def _AbortMigration(self):
7366     """Call the hypervisor code to abort a started migration.
7367
7368     """
7369     instance = self.instance
7370     target_node = self.target_node
7371     migration_info = self.migration_info
7372
7373     abort_result = self.rpc.call_finalize_migration(target_node,
7374                                                     instance,
7375                                                     migration_info,
7376                                                     False)
7377     abort_msg = abort_result.fail_msg
7378     if abort_msg:
7379       logging.error("Aborting migration failed on target node %s: %s",
7380                     target_node, abort_msg)
7381       # Don't raise an exception here, as we stil have to try to revert the
7382       # disk status, even if this step failed.
7383
7384   def _ExecMigration(self):
7385     """Migrate an instance.
7386
7387     The migrate is done by:
7388       - change the disks into dual-master mode
7389       - wait until disks are fully synchronized again
7390       - migrate the instance
7391       - change disks on the new secondary node (the old primary) to secondary
7392       - wait until disks are fully synchronized
7393       - change disks into single-master mode
7394
7395     """
7396     instance = self.instance
7397     target_node = self.target_node
7398     source_node = self.source_node
7399
7400     self.feedback_fn("* checking disk consistency between source and target")
7401     for dev in instance.disks:
7402       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7403         raise errors.OpExecError("Disk %s is degraded or not fully"
7404                                  " synchronized on target node,"
7405                                  " aborting migration" % dev.iv_name)
7406
7407     # First get the migration information from the remote node
7408     result = self.rpc.call_migration_info(source_node, instance)
7409     msg = result.fail_msg
7410     if msg:
7411       log_err = ("Failed fetching source migration information from %s: %s" %
7412                  (source_node, msg))
7413       logging.error(log_err)
7414       raise errors.OpExecError(log_err)
7415
7416     self.migration_info = migration_info = result.payload
7417
7418     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7419       # Then switch the disks to master/master mode
7420       self._EnsureSecondary(target_node)
7421       self._GoStandalone()
7422       self._GoReconnect(True)
7423       self._WaitUntilSync()
7424
7425     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7426     result = self.rpc.call_accept_instance(target_node,
7427                                            instance,
7428                                            migration_info,
7429                                            self.nodes_ip[target_node])
7430
7431     msg = result.fail_msg
7432     if msg:
7433       logging.error("Instance pre-migration failed, trying to revert"
7434                     " disk status: %s", msg)
7435       self.feedback_fn("Pre-migration failed, aborting")
7436       self._AbortMigration()
7437       self._RevertDiskStatus()
7438       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7439                                (instance.name, msg))
7440
7441     self.feedback_fn("* migrating instance to %s" % target_node)
7442     result = self.rpc.call_instance_migrate(source_node, instance,
7443                                             self.nodes_ip[target_node],
7444                                             self.live)
7445     msg = result.fail_msg
7446     if msg:
7447       logging.error("Instance migration failed, trying to revert"
7448                     " disk status: %s", msg)
7449       self.feedback_fn("Migration failed, aborting")
7450       self._AbortMigration()
7451       self._RevertDiskStatus()
7452       raise errors.OpExecError("Could not migrate instance %s: %s" %
7453                                (instance.name, msg))
7454
7455     instance.primary_node = target_node
7456     # distribute new instance config to the other nodes
7457     self.cfg.Update(instance, self.feedback_fn)
7458
7459     result = self.rpc.call_finalize_migration(target_node,
7460                                               instance,
7461                                               migration_info,
7462                                               True)
7463     msg = result.fail_msg
7464     if msg:
7465       logging.error("Instance migration succeeded, but finalization failed:"
7466                     " %s", msg)
7467       raise errors.OpExecError("Could not finalize instance migration: %s" %
7468                                msg)
7469
7470     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7471       self._EnsureSecondary(source_node)
7472       self._WaitUntilSync()
7473       self._GoStandalone()
7474       self._GoReconnect(False)
7475       self._WaitUntilSync()
7476
7477     self.feedback_fn("* done")
7478
7479   def _ExecFailover(self):
7480     """Failover an instance.
7481
7482     The failover is done by shutting it down on its present node and
7483     starting it on the secondary.
7484
7485     """
7486     instance = self.instance
7487     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7488
7489     source_node = instance.primary_node
7490     target_node = self.target_node
7491
7492     if instance.admin_up:
7493       self.feedback_fn("* checking disk consistency between source and target")
7494       for dev in instance.disks:
7495         # for drbd, these are drbd over lvm
7496         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7497           if primary_node.offline:
7498             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7499                              " target node %s" %
7500                              (primary_node.name, dev.iv_name, target_node))
7501           elif not self.ignore_consistency:
7502             raise errors.OpExecError("Disk %s is degraded on target node,"
7503                                      " aborting failover" % dev.iv_name)
7504     else:
7505       self.feedback_fn("* not checking disk consistency as instance is not"
7506                        " running")
7507
7508     self.feedback_fn("* shutting down instance on source node")
7509     logging.info("Shutting down instance %s on node %s",
7510                  instance.name, source_node)
7511
7512     result = self.rpc.call_instance_shutdown(source_node, instance,
7513                                              self.shutdown_timeout)
7514     msg = result.fail_msg
7515     if msg:
7516       if self.ignore_consistency or primary_node.offline:
7517         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7518                            " proceeding anyway; please make sure node"
7519                            " %s is down; error details: %s",
7520                            instance.name, source_node, source_node, msg)
7521       else:
7522         raise errors.OpExecError("Could not shutdown instance %s on"
7523                                  " node %s: %s" %
7524                                  (instance.name, source_node, msg))
7525
7526     self.feedback_fn("* deactivating the instance's disks on source node")
7527     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7528       raise errors.OpExecError("Can't shut down the instance's disks")
7529
7530     instance.primary_node = target_node
7531     # distribute new instance config to the other nodes
7532     self.cfg.Update(instance, self.feedback_fn)
7533
7534     # Only start the instance if it's marked as up
7535     if instance.admin_up:
7536       self.feedback_fn("* activating the instance's disks on target node %s" %
7537                        target_node)
7538       logging.info("Starting instance %s on node %s",
7539                    instance.name, target_node)
7540
7541       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7542                                            ignore_secondaries=True)
7543       if not disks_ok:
7544         _ShutdownInstanceDisks(self.lu, instance)
7545         raise errors.OpExecError("Can't activate the instance's disks")
7546
7547       self.feedback_fn("* starting the instance on the target node %s" %
7548                        target_node)
7549       result = self.rpc.call_instance_start(target_node, instance, None, None,
7550                                             False)
7551       msg = result.fail_msg
7552       if msg:
7553         _ShutdownInstanceDisks(self.lu, instance)
7554         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7555                                  (instance.name, target_node, msg))
7556
7557   def Exec(self, feedback_fn):
7558     """Perform the migration.
7559
7560     """
7561     self.feedback_fn = feedback_fn
7562     self.source_node = self.instance.primary_node
7563
7564     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7565     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7566       self.target_node = self.instance.secondary_nodes[0]
7567       # Otherwise self.target_node has been populated either
7568       # directly, or through an iallocator.
7569
7570     self.all_nodes = [self.source_node, self.target_node]
7571     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7572                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7573
7574     if self.failover:
7575       feedback_fn("Failover instance %s" % self.instance.name)
7576       self._ExecFailover()
7577     else:
7578       feedback_fn("Migrating instance %s" % self.instance.name)
7579
7580       if self.cleanup:
7581         return self._ExecCleanup()
7582       else:
7583         return self._ExecMigration()
7584
7585
7586 def _CreateBlockDev(lu, node, instance, device, force_create,
7587                     info, force_open):
7588   """Create a tree of block devices on a given node.
7589
7590   If this device type has to be created on secondaries, create it and
7591   all its children.
7592
7593   If not, just recurse to children keeping the same 'force' value.
7594
7595   @param lu: the lu on whose behalf we execute
7596   @param node: the node on which to create the device
7597   @type instance: L{objects.Instance}
7598   @param instance: the instance which owns the device
7599   @type device: L{objects.Disk}
7600   @param device: the device to create
7601   @type force_create: boolean
7602   @param force_create: whether to force creation of this device; this
7603       will be change to True whenever we find a device which has
7604       CreateOnSecondary() attribute
7605   @param info: the extra 'metadata' we should attach to the device
7606       (this will be represented as a LVM tag)
7607   @type force_open: boolean
7608   @param force_open: this parameter will be passes to the
7609       L{backend.BlockdevCreate} function where it specifies
7610       whether we run on primary or not, and it affects both
7611       the child assembly and the device own Open() execution
7612
7613   """
7614   if device.CreateOnSecondary():
7615     force_create = True
7616
7617   if device.children:
7618     for child in device.children:
7619       _CreateBlockDev(lu, node, instance, child, force_create,
7620                       info, force_open)
7621
7622   if not force_create:
7623     return
7624
7625   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7626
7627
7628 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7629   """Create a single block device on a given node.
7630
7631   This will not recurse over children of the device, so they must be
7632   created in advance.
7633
7634   @param lu: the lu on whose behalf we execute
7635   @param node: the node on which to create the device
7636   @type instance: L{objects.Instance}
7637   @param instance: the instance which owns the device
7638   @type device: L{objects.Disk}
7639   @param device: the device to create
7640   @param info: the extra 'metadata' we should attach to the device
7641       (this will be represented as a LVM tag)
7642   @type force_open: boolean
7643   @param force_open: this parameter will be passes to the
7644       L{backend.BlockdevCreate} function where it specifies
7645       whether we run on primary or not, and it affects both
7646       the child assembly and the device own Open() execution
7647
7648   """
7649   lu.cfg.SetDiskID(device, node)
7650   result = lu.rpc.call_blockdev_create(node, device, device.size,
7651                                        instance.name, force_open, info)
7652   result.Raise("Can't create block device %s on"
7653                " node %s for instance %s" % (device, node, instance.name))
7654   if device.physical_id is None:
7655     device.physical_id = result.payload
7656
7657
7658 def _GenerateUniqueNames(lu, exts):
7659   """Generate a suitable LV name.
7660
7661   This will generate a logical volume name for the given instance.
7662
7663   """
7664   results = []
7665   for val in exts:
7666     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7667     results.append("%s%s" % (new_id, val))
7668   return results
7669
7670
7671 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7672                          iv_name, p_minor, s_minor):
7673   """Generate a drbd8 device complete with its children.
7674
7675   """
7676   assert len(vgnames) == len(names) == 2
7677   port = lu.cfg.AllocatePort()
7678   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7679   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7680                           logical_id=(vgnames[0], names[0]))
7681   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7682                           logical_id=(vgnames[1], names[1]))
7683   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7684                           logical_id=(primary, secondary, port,
7685                                       p_minor, s_minor,
7686                                       shared_secret),
7687                           children=[dev_data, dev_meta],
7688                           iv_name=iv_name)
7689   return drbd_dev
7690
7691
7692 def _GenerateDiskTemplate(lu, template_name,
7693                           instance_name, primary_node,
7694                           secondary_nodes, disk_info,
7695                           file_storage_dir, file_driver,
7696                           base_index, feedback_fn):
7697   """Generate the entire disk layout for a given template type.
7698
7699   """
7700   #TODO: compute space requirements
7701
7702   vgname = lu.cfg.GetVGName()
7703   disk_count = len(disk_info)
7704   disks = []
7705   if template_name == constants.DT_DISKLESS:
7706     pass
7707   elif template_name == constants.DT_PLAIN:
7708     if len(secondary_nodes) != 0:
7709       raise errors.ProgrammerError("Wrong template configuration")
7710
7711     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7712                                       for i in range(disk_count)])
7713     for idx, disk in enumerate(disk_info):
7714       disk_index = idx + base_index
7715       vg = disk.get(constants.IDISK_VG, vgname)
7716       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7717       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7718                               size=disk[constants.IDISK_SIZE],
7719                               logical_id=(vg, names[idx]),
7720                               iv_name="disk/%d" % disk_index,
7721                               mode=disk[constants.IDISK_MODE])
7722       disks.append(disk_dev)
7723   elif template_name == constants.DT_DRBD8:
7724     if len(secondary_nodes) != 1:
7725       raise errors.ProgrammerError("Wrong template configuration")
7726     remote_node = secondary_nodes[0]
7727     minors = lu.cfg.AllocateDRBDMinor(
7728       [primary_node, remote_node] * len(disk_info), instance_name)
7729
7730     names = []
7731     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7732                                                for i in range(disk_count)]):
7733       names.append(lv_prefix + "_data")
7734       names.append(lv_prefix + "_meta")
7735     for idx, disk in enumerate(disk_info):
7736       disk_index = idx + base_index
7737       data_vg = disk.get(constants.IDISK_VG, vgname)
7738       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7739       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7740                                       disk[constants.IDISK_SIZE],
7741                                       [data_vg, meta_vg],
7742                                       names[idx * 2:idx * 2 + 2],
7743                                       "disk/%d" % disk_index,
7744                                       minors[idx * 2], minors[idx * 2 + 1])
7745       disk_dev.mode = disk[constants.IDISK_MODE]
7746       disks.append(disk_dev)
7747   elif template_name == constants.DT_FILE:
7748     if len(secondary_nodes) != 0:
7749       raise errors.ProgrammerError("Wrong template configuration")
7750
7751     opcodes.RequireFileStorage()
7752
7753     for idx, disk in enumerate(disk_info):
7754       disk_index = idx + base_index
7755       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7756                               size=disk[constants.IDISK_SIZE],
7757                               iv_name="disk/%d" % disk_index,
7758                               logical_id=(file_driver,
7759                                           "%s/disk%d" % (file_storage_dir,
7760                                                          disk_index)),
7761                               mode=disk[constants.IDISK_MODE])
7762       disks.append(disk_dev)
7763   elif template_name == constants.DT_SHARED_FILE:
7764     if len(secondary_nodes) != 0:
7765       raise errors.ProgrammerError("Wrong template configuration")
7766
7767     opcodes.RequireSharedFileStorage()
7768
7769     for idx, disk in enumerate(disk_info):
7770       disk_index = idx + base_index
7771       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7772                               size=disk[constants.IDISK_SIZE],
7773                               iv_name="disk/%d" % disk_index,
7774                               logical_id=(file_driver,
7775                                           "%s/disk%d" % (file_storage_dir,
7776                                                          disk_index)),
7777                               mode=disk[constants.IDISK_MODE])
7778       disks.append(disk_dev)
7779   elif template_name == constants.DT_BLOCK:
7780     if len(secondary_nodes) != 0:
7781       raise errors.ProgrammerError("Wrong template configuration")
7782
7783     for idx, disk in enumerate(disk_info):
7784       disk_index = idx + base_index
7785       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7786                               size=disk[constants.IDISK_SIZE],
7787                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7788                                           disk[constants.IDISK_ADOPT]),
7789                               iv_name="disk/%d" % disk_index,
7790                               mode=disk[constants.IDISK_MODE])
7791       disks.append(disk_dev)
7792
7793   else:
7794     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7795   return disks
7796
7797
7798 def _GetInstanceInfoText(instance):
7799   """Compute that text that should be added to the disk's metadata.
7800
7801   """
7802   return "originstname+%s" % instance.name
7803
7804
7805 def _CalcEta(time_taken, written, total_size):
7806   """Calculates the ETA based on size written and total size.
7807
7808   @param time_taken: The time taken so far
7809   @param written: amount written so far
7810   @param total_size: The total size of data to be written
7811   @return: The remaining time in seconds
7812
7813   """
7814   avg_time = time_taken / float(written)
7815   return (total_size - written) * avg_time
7816
7817
7818 def _WipeDisks(lu, instance):
7819   """Wipes instance disks.
7820
7821   @type lu: L{LogicalUnit}
7822   @param lu: the logical unit on whose behalf we execute
7823   @type instance: L{objects.Instance}
7824   @param instance: the instance whose disks we should create
7825   @return: the success of the wipe
7826
7827   """
7828   node = instance.primary_node
7829
7830   for device in instance.disks:
7831     lu.cfg.SetDiskID(device, node)
7832
7833   logging.info("Pause sync of instance %s disks", instance.name)
7834   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7835
7836   for idx, success in enumerate(result.payload):
7837     if not success:
7838       logging.warn("pause-sync of instance %s for disks %d failed",
7839                    instance.name, idx)
7840
7841   try:
7842     for idx, device in enumerate(instance.disks):
7843       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7844       # MAX_WIPE_CHUNK at max
7845       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7846                             constants.MIN_WIPE_CHUNK_PERCENT)
7847       # we _must_ make this an int, otherwise rounding errors will
7848       # occur
7849       wipe_chunk_size = int(wipe_chunk_size)
7850
7851       lu.LogInfo("* Wiping disk %d", idx)
7852       logging.info("Wiping disk %d for instance %s, node %s using"
7853                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7854
7855       offset = 0
7856       size = device.size
7857       last_output = 0
7858       start_time = time.time()
7859
7860       while offset < size:
7861         wipe_size = min(wipe_chunk_size, size - offset)
7862         logging.debug("Wiping disk %d, offset %s, chunk %s",
7863                       idx, offset, wipe_size)
7864         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7865         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7866                      (idx, offset, wipe_size))
7867         now = time.time()
7868         offset += wipe_size
7869         if now - last_output >= 60:
7870           eta = _CalcEta(now - start_time, offset, size)
7871           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7872                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7873           last_output = now
7874   finally:
7875     logging.info("Resume sync of instance %s disks", instance.name)
7876
7877     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7878
7879     for idx, success in enumerate(result.payload):
7880       if not success:
7881         lu.LogWarning("Resume sync of disk %d failed, please have a"
7882                       " look at the status and troubleshoot the issue", idx)
7883         logging.warn("resume-sync of instance %s for disks %d failed",
7884                      instance.name, idx)
7885
7886
7887 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7888   """Create all disks for an instance.
7889
7890   This abstracts away some work from AddInstance.
7891
7892   @type lu: L{LogicalUnit}
7893   @param lu: the logical unit on whose behalf we execute
7894   @type instance: L{objects.Instance}
7895   @param instance: the instance whose disks we should create
7896   @type to_skip: list
7897   @param to_skip: list of indices to skip
7898   @type target_node: string
7899   @param target_node: if passed, overrides the target node for creation
7900   @rtype: boolean
7901   @return: the success of the creation
7902
7903   """
7904   info = _GetInstanceInfoText(instance)
7905   if target_node is None:
7906     pnode = instance.primary_node
7907     all_nodes = instance.all_nodes
7908   else:
7909     pnode = target_node
7910     all_nodes = [pnode]
7911
7912   if instance.disk_template in constants.DTS_FILEBASED:
7913     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7914     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7915
7916     result.Raise("Failed to create directory '%s' on"
7917                  " node %s" % (file_storage_dir, pnode))
7918
7919   # Note: this needs to be kept in sync with adding of disks in
7920   # LUInstanceSetParams
7921   for idx, device in enumerate(instance.disks):
7922     if to_skip and idx in to_skip:
7923       continue
7924     logging.info("Creating volume %s for instance %s",
7925                  device.iv_name, instance.name)
7926     #HARDCODE
7927     for node in all_nodes:
7928       f_create = node == pnode
7929       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7930
7931
7932 def _RemoveDisks(lu, instance, target_node=None):
7933   """Remove all disks for an instance.
7934
7935   This abstracts away some work from `AddInstance()` and
7936   `RemoveInstance()`. Note that in case some of the devices couldn't
7937   be removed, the removal will continue with the other ones (compare
7938   with `_CreateDisks()`).
7939
7940   @type lu: L{LogicalUnit}
7941   @param lu: the logical unit on whose behalf we execute
7942   @type instance: L{objects.Instance}
7943   @param instance: the instance whose disks we should remove
7944   @type target_node: string
7945   @param target_node: used to override the node on which to remove the disks
7946   @rtype: boolean
7947   @return: the success of the removal
7948
7949   """
7950   logging.info("Removing block devices for instance %s", instance.name)
7951
7952   all_result = True
7953   for device in instance.disks:
7954     if target_node:
7955       edata = [(target_node, device)]
7956     else:
7957       edata = device.ComputeNodeTree(instance.primary_node)
7958     for node, disk in edata:
7959       lu.cfg.SetDiskID(disk, node)
7960       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7961       if msg:
7962         lu.LogWarning("Could not remove block device %s on node %s,"
7963                       " continuing anyway: %s", device.iv_name, node, msg)
7964         all_result = False
7965
7966     # if this is a DRBD disk, return its port to the pool
7967     if device.dev_type in constants.LDS_DRBD:
7968       tcp_port = device.logical_id[2]
7969       lu.cfg.AddTcpUdpPort(tcp_port)
7970
7971   if instance.disk_template == constants.DT_FILE:
7972     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7973     if target_node:
7974       tgt = target_node
7975     else:
7976       tgt = instance.primary_node
7977     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7978     if result.fail_msg:
7979       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7980                     file_storage_dir, instance.primary_node, result.fail_msg)
7981       all_result = False
7982
7983   return all_result
7984
7985
7986 def _ComputeDiskSizePerVG(disk_template, disks):
7987   """Compute disk size requirements in the volume group
7988
7989   """
7990   def _compute(disks, payload):
7991     """Universal algorithm.
7992
7993     """
7994     vgs = {}
7995     for disk in disks:
7996       vgs[disk[constants.IDISK_VG]] = \
7997         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7998
7999     return vgs
8000
8001   # Required free disk space as a function of disk and swap space
8002   req_size_dict = {
8003     constants.DT_DISKLESS: {},
8004     constants.DT_PLAIN: _compute(disks, 0),
8005     # 128 MB are added for drbd metadata for each disk
8006     constants.DT_DRBD8: _compute(disks, 128),
8007     constants.DT_FILE: {},
8008     constants.DT_SHARED_FILE: {},
8009   }
8010
8011   if disk_template not in req_size_dict:
8012     raise errors.ProgrammerError("Disk template '%s' size requirement"
8013                                  " is unknown" % disk_template)
8014
8015   return req_size_dict[disk_template]
8016
8017
8018 def _ComputeDiskSize(disk_template, disks):
8019   """Compute disk size requirements in the volume group
8020
8021   """
8022   # Required free disk space as a function of disk and swap space
8023   req_size_dict = {
8024     constants.DT_DISKLESS: None,
8025     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8026     # 128 MB are added for drbd metadata for each disk
8027     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8028     constants.DT_FILE: None,
8029     constants.DT_SHARED_FILE: 0,
8030     constants.DT_BLOCK: 0,
8031   }
8032
8033   if disk_template not in req_size_dict:
8034     raise errors.ProgrammerError("Disk template '%s' size requirement"
8035                                  " is unknown" % disk_template)
8036
8037   return req_size_dict[disk_template]
8038
8039
8040 def _FilterVmNodes(lu, nodenames):
8041   """Filters out non-vm_capable nodes from a list.
8042
8043   @type lu: L{LogicalUnit}
8044   @param lu: the logical unit for which we check
8045   @type nodenames: list
8046   @param nodenames: the list of nodes on which we should check
8047   @rtype: list
8048   @return: the list of vm-capable nodes
8049
8050   """
8051   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8052   return [name for name in nodenames if name not in vm_nodes]
8053
8054
8055 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8056   """Hypervisor parameter validation.
8057
8058   This function abstract the hypervisor parameter validation to be
8059   used in both instance create and instance modify.
8060
8061   @type lu: L{LogicalUnit}
8062   @param lu: the logical unit for which we check
8063   @type nodenames: list
8064   @param nodenames: the list of nodes on which we should check
8065   @type hvname: string
8066   @param hvname: the name of the hypervisor we should use
8067   @type hvparams: dict
8068   @param hvparams: the parameters which we need to check
8069   @raise errors.OpPrereqError: if the parameters are not valid
8070
8071   """
8072   nodenames = _FilterVmNodes(lu, nodenames)
8073   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8074                                                   hvname,
8075                                                   hvparams)
8076   for node in nodenames:
8077     info = hvinfo[node]
8078     if info.offline:
8079       continue
8080     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8081
8082
8083 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8084   """OS parameters validation.
8085
8086   @type lu: L{LogicalUnit}
8087   @param lu: the logical unit for which we check
8088   @type required: boolean
8089   @param required: whether the validation should fail if the OS is not
8090       found
8091   @type nodenames: list
8092   @param nodenames: the list of nodes on which we should check
8093   @type osname: string
8094   @param osname: the name of the hypervisor we should use
8095   @type osparams: dict
8096   @param osparams: the parameters which we need to check
8097   @raise errors.OpPrereqError: if the parameters are not valid
8098
8099   """
8100   nodenames = _FilterVmNodes(lu, nodenames)
8101   result = lu.rpc.call_os_validate(required, nodenames, osname,
8102                                    [constants.OS_VALIDATE_PARAMETERS],
8103                                    osparams)
8104   for node, nres in result.items():
8105     # we don't check for offline cases since this should be run only
8106     # against the master node and/or an instance's nodes
8107     nres.Raise("OS Parameters validation failed on node %s" % node)
8108     if not nres.payload:
8109       lu.LogInfo("OS %s not found on node %s, validation skipped",
8110                  osname, node)
8111
8112
8113 class LUInstanceCreate(LogicalUnit):
8114   """Create an instance.
8115
8116   """
8117   HPATH = "instance-add"
8118   HTYPE = constants.HTYPE_INSTANCE
8119   REQ_BGL = False
8120
8121   def CheckArguments(self):
8122     """Check arguments.
8123
8124     """
8125     # do not require name_check to ease forward/backward compatibility
8126     # for tools
8127     if self.op.no_install and self.op.start:
8128       self.LogInfo("No-installation mode selected, disabling startup")
8129       self.op.start = False
8130     # validate/normalize the instance name
8131     self.op.instance_name = \
8132       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8133
8134     if self.op.ip_check and not self.op.name_check:
8135       # TODO: make the ip check more flexible and not depend on the name check
8136       raise errors.OpPrereqError("Cannot do IP address check without a name"
8137                                  " check", errors.ECODE_INVAL)
8138
8139     # check nics' parameter names
8140     for nic in self.op.nics:
8141       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8142
8143     # check disks. parameter names and consistent adopt/no-adopt strategy
8144     has_adopt = has_no_adopt = False
8145     for disk in self.op.disks:
8146       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8147       if constants.IDISK_ADOPT in disk:
8148         has_adopt = True
8149       else:
8150         has_no_adopt = True
8151     if has_adopt and has_no_adopt:
8152       raise errors.OpPrereqError("Either all disks are adopted or none is",
8153                                  errors.ECODE_INVAL)
8154     if has_adopt:
8155       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8156         raise errors.OpPrereqError("Disk adoption is not supported for the"
8157                                    " '%s' disk template" %
8158                                    self.op.disk_template,
8159                                    errors.ECODE_INVAL)
8160       if self.op.iallocator is not None:
8161         raise errors.OpPrereqError("Disk adoption not allowed with an"
8162                                    " iallocator script", errors.ECODE_INVAL)
8163       if self.op.mode == constants.INSTANCE_IMPORT:
8164         raise errors.OpPrereqError("Disk adoption not allowed for"
8165                                    " instance import", errors.ECODE_INVAL)
8166     else:
8167       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8168         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8169                                    " but no 'adopt' parameter given" %
8170                                    self.op.disk_template,
8171                                    errors.ECODE_INVAL)
8172
8173     self.adopt_disks = has_adopt
8174
8175     # instance name verification
8176     if self.op.name_check:
8177       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8178       self.op.instance_name = self.hostname1.name
8179       # used in CheckPrereq for ip ping check
8180       self.check_ip = self.hostname1.ip
8181     else:
8182       self.check_ip = None
8183
8184     # file storage checks
8185     if (self.op.file_driver and
8186         not self.op.file_driver in constants.FILE_DRIVER):
8187       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8188                                  self.op.file_driver, errors.ECODE_INVAL)
8189
8190     if self.op.disk_template == constants.DT_FILE:
8191       opcodes.RequireFileStorage()
8192     elif self.op.disk_template == constants.DT_SHARED_FILE:
8193       opcodes.RequireSharedFileStorage()
8194
8195     ### Node/iallocator related checks
8196     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8197
8198     if self.op.pnode is not None:
8199       if self.op.disk_template in constants.DTS_INT_MIRROR:
8200         if self.op.snode is None:
8201           raise errors.OpPrereqError("The networked disk templates need"
8202                                      " a mirror node", errors.ECODE_INVAL)
8203       elif self.op.snode:
8204         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8205                         " template")
8206         self.op.snode = None
8207
8208     self._cds = _GetClusterDomainSecret()
8209
8210     if self.op.mode == constants.INSTANCE_IMPORT:
8211       # On import force_variant must be True, because if we forced it at
8212       # initial install, our only chance when importing it back is that it
8213       # works again!
8214       self.op.force_variant = True
8215
8216       if self.op.no_install:
8217         self.LogInfo("No-installation mode has no effect during import")
8218
8219     elif self.op.mode == constants.INSTANCE_CREATE:
8220       if self.op.os_type is None:
8221         raise errors.OpPrereqError("No guest OS specified",
8222                                    errors.ECODE_INVAL)
8223       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8224         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8225                                    " installation" % self.op.os_type,
8226                                    errors.ECODE_STATE)
8227       if self.op.disk_template is None:
8228         raise errors.OpPrereqError("No disk template specified",
8229                                    errors.ECODE_INVAL)
8230
8231     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8232       # Check handshake to ensure both clusters have the same domain secret
8233       src_handshake = self.op.source_handshake
8234       if not src_handshake:
8235         raise errors.OpPrereqError("Missing source handshake",
8236                                    errors.ECODE_INVAL)
8237
8238       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8239                                                            src_handshake)
8240       if errmsg:
8241         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8242                                    errors.ECODE_INVAL)
8243
8244       # Load and check source CA
8245       self.source_x509_ca_pem = self.op.source_x509_ca
8246       if not self.source_x509_ca_pem:
8247         raise errors.OpPrereqError("Missing source X509 CA",
8248                                    errors.ECODE_INVAL)
8249
8250       try:
8251         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8252                                                     self._cds)
8253       except OpenSSL.crypto.Error, err:
8254         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8255                                    (err, ), errors.ECODE_INVAL)
8256
8257       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8258       if errcode is not None:
8259         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8260                                    errors.ECODE_INVAL)
8261
8262       self.source_x509_ca = cert
8263
8264       src_instance_name = self.op.source_instance_name
8265       if not src_instance_name:
8266         raise errors.OpPrereqError("Missing source instance name",
8267                                    errors.ECODE_INVAL)
8268
8269       self.source_instance_name = \
8270           netutils.GetHostname(name=src_instance_name).name
8271
8272     else:
8273       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8274                                  self.op.mode, errors.ECODE_INVAL)
8275
8276   def ExpandNames(self):
8277     """ExpandNames for CreateInstance.
8278
8279     Figure out the right locks for instance creation.
8280
8281     """
8282     self.needed_locks = {}
8283
8284     instance_name = self.op.instance_name
8285     # this is just a preventive check, but someone might still add this
8286     # instance in the meantime, and creation will fail at lock-add time
8287     if instance_name in self.cfg.GetInstanceList():
8288       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8289                                  instance_name, errors.ECODE_EXISTS)
8290
8291     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8292
8293     if self.op.iallocator:
8294       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8295     else:
8296       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8297       nodelist = [self.op.pnode]
8298       if self.op.snode is not None:
8299         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8300         nodelist.append(self.op.snode)
8301       self.needed_locks[locking.LEVEL_NODE] = nodelist
8302
8303     # in case of import lock the source node too
8304     if self.op.mode == constants.INSTANCE_IMPORT:
8305       src_node = self.op.src_node
8306       src_path = self.op.src_path
8307
8308       if src_path is None:
8309         self.op.src_path = src_path = self.op.instance_name
8310
8311       if src_node is None:
8312         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8313         self.op.src_node = None
8314         if os.path.isabs(src_path):
8315           raise errors.OpPrereqError("Importing an instance from a path"
8316                                      " requires a source node option",
8317                                      errors.ECODE_INVAL)
8318       else:
8319         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8320         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8321           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8322         if not os.path.isabs(src_path):
8323           self.op.src_path = src_path = \
8324             utils.PathJoin(constants.EXPORT_DIR, src_path)
8325
8326   def _RunAllocator(self):
8327     """Run the allocator based on input opcode.
8328
8329     """
8330     nics = [n.ToDict() for n in self.nics]
8331     ial = IAllocator(self.cfg, self.rpc,
8332                      mode=constants.IALLOCATOR_MODE_ALLOC,
8333                      name=self.op.instance_name,
8334                      disk_template=self.op.disk_template,
8335                      tags=self.op.tags,
8336                      os=self.op.os_type,
8337                      vcpus=self.be_full[constants.BE_VCPUS],
8338                      memory=self.be_full[constants.BE_MEMORY],
8339                      disks=self.disks,
8340                      nics=nics,
8341                      hypervisor=self.op.hypervisor,
8342                      )
8343
8344     ial.Run(self.op.iallocator)
8345
8346     if not ial.success:
8347       raise errors.OpPrereqError("Can't compute nodes using"
8348                                  " iallocator '%s': %s" %
8349                                  (self.op.iallocator, ial.info),
8350                                  errors.ECODE_NORES)
8351     if len(ial.result) != ial.required_nodes:
8352       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8353                                  " of nodes (%s), required %s" %
8354                                  (self.op.iallocator, len(ial.result),
8355                                   ial.required_nodes), errors.ECODE_FAULT)
8356     self.op.pnode = ial.result[0]
8357     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8358                  self.op.instance_name, self.op.iallocator,
8359                  utils.CommaJoin(ial.result))
8360     if ial.required_nodes == 2:
8361       self.op.snode = ial.result[1]
8362
8363   def BuildHooksEnv(self):
8364     """Build hooks env.
8365
8366     This runs on master, primary and secondary nodes of the instance.
8367
8368     """
8369     env = {
8370       "ADD_MODE": self.op.mode,
8371       }
8372     if self.op.mode == constants.INSTANCE_IMPORT:
8373       env["SRC_NODE"] = self.op.src_node
8374       env["SRC_PATH"] = self.op.src_path
8375       env["SRC_IMAGES"] = self.src_images
8376
8377     env.update(_BuildInstanceHookEnv(
8378       name=self.op.instance_name,
8379       primary_node=self.op.pnode,
8380       secondary_nodes=self.secondaries,
8381       status=self.op.start,
8382       os_type=self.op.os_type,
8383       memory=self.be_full[constants.BE_MEMORY],
8384       vcpus=self.be_full[constants.BE_VCPUS],
8385       nics=_NICListToTuple(self, self.nics),
8386       disk_template=self.op.disk_template,
8387       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8388              for d in self.disks],
8389       bep=self.be_full,
8390       hvp=self.hv_full,
8391       hypervisor_name=self.op.hypervisor,
8392       tags=self.op.tags,
8393     ))
8394
8395     return env
8396
8397   def BuildHooksNodes(self):
8398     """Build hooks nodes.
8399
8400     """
8401     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8402     return nl, nl
8403
8404   def _ReadExportInfo(self):
8405     """Reads the export information from disk.
8406
8407     It will override the opcode source node and path with the actual
8408     information, if these two were not specified before.
8409
8410     @return: the export information
8411
8412     """
8413     assert self.op.mode == constants.INSTANCE_IMPORT
8414
8415     src_node = self.op.src_node
8416     src_path = self.op.src_path
8417
8418     if src_node is None:
8419       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8420       exp_list = self.rpc.call_export_list(locked_nodes)
8421       found = False
8422       for node in exp_list:
8423         if exp_list[node].fail_msg:
8424           continue
8425         if src_path in exp_list[node].payload:
8426           found = True
8427           self.op.src_node = src_node = node
8428           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8429                                                        src_path)
8430           break
8431       if not found:
8432         raise errors.OpPrereqError("No export found for relative path %s" %
8433                                     src_path, errors.ECODE_INVAL)
8434
8435     _CheckNodeOnline(self, src_node)
8436     result = self.rpc.call_export_info(src_node, src_path)
8437     result.Raise("No export or invalid export found in dir %s" % src_path)
8438
8439     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8440     if not export_info.has_section(constants.INISECT_EXP):
8441       raise errors.ProgrammerError("Corrupted export config",
8442                                    errors.ECODE_ENVIRON)
8443
8444     ei_version = export_info.get(constants.INISECT_EXP, "version")
8445     if (int(ei_version) != constants.EXPORT_VERSION):
8446       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8447                                  (ei_version, constants.EXPORT_VERSION),
8448                                  errors.ECODE_ENVIRON)
8449     return export_info
8450
8451   def _ReadExportParams(self, einfo):
8452     """Use export parameters as defaults.
8453
8454     In case the opcode doesn't specify (as in override) some instance
8455     parameters, then try to use them from the export information, if
8456     that declares them.
8457
8458     """
8459     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8460
8461     if self.op.disk_template is None:
8462       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8463         self.op.disk_template = einfo.get(constants.INISECT_INS,
8464                                           "disk_template")
8465       else:
8466         raise errors.OpPrereqError("No disk template specified and the export"
8467                                    " is missing the disk_template information",
8468                                    errors.ECODE_INVAL)
8469
8470     if not self.op.disks:
8471       if einfo.has_option(constants.INISECT_INS, "disk_count"):
8472         disks = []
8473         # TODO: import the disk iv_name too
8474         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8475           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8476           disks.append({constants.IDISK_SIZE: disk_sz})
8477         self.op.disks = disks
8478       else:
8479         raise errors.OpPrereqError("No disk info specified and the export"
8480                                    " is missing the disk information",
8481                                    errors.ECODE_INVAL)
8482
8483     if (not self.op.nics and
8484         einfo.has_option(constants.INISECT_INS, "nic_count")):
8485       nics = []
8486       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8487         ndict = {}
8488         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8489           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8490           ndict[name] = v
8491         nics.append(ndict)
8492       self.op.nics = nics
8493
8494     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8495       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8496
8497     if (self.op.hypervisor is None and
8498         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8499       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8500
8501     if einfo.has_section(constants.INISECT_HYP):
8502       # use the export parameters but do not override the ones
8503       # specified by the user
8504       for name, value in einfo.items(constants.INISECT_HYP):
8505         if name not in self.op.hvparams:
8506           self.op.hvparams[name] = value
8507
8508     if einfo.has_section(constants.INISECT_BEP):
8509       # use the parameters, without overriding
8510       for name, value in einfo.items(constants.INISECT_BEP):
8511         if name not in self.op.beparams:
8512           self.op.beparams[name] = value
8513     else:
8514       # try to read the parameters old style, from the main section
8515       for name in constants.BES_PARAMETERS:
8516         if (name not in self.op.beparams and
8517             einfo.has_option(constants.INISECT_INS, name)):
8518           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8519
8520     if einfo.has_section(constants.INISECT_OSP):
8521       # use the parameters, without overriding
8522       for name, value in einfo.items(constants.INISECT_OSP):
8523         if name not in self.op.osparams:
8524           self.op.osparams[name] = value
8525
8526   def _RevertToDefaults(self, cluster):
8527     """Revert the instance parameters to the default values.
8528
8529     """
8530     # hvparams
8531     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8532     for name in self.op.hvparams.keys():
8533       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8534         del self.op.hvparams[name]
8535     # beparams
8536     be_defs = cluster.SimpleFillBE({})
8537     for name in self.op.beparams.keys():
8538       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8539         del self.op.beparams[name]
8540     # nic params
8541     nic_defs = cluster.SimpleFillNIC({})
8542     for nic in self.op.nics:
8543       for name in constants.NICS_PARAMETERS:
8544         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8545           del nic[name]
8546     # osparams
8547     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8548     for name in self.op.osparams.keys():
8549       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8550         del self.op.osparams[name]
8551
8552   def _CalculateFileStorageDir(self):
8553     """Calculate final instance file storage dir.
8554
8555     """
8556     # file storage dir calculation/check
8557     self.instance_file_storage_dir = None
8558     if self.op.disk_template in constants.DTS_FILEBASED:
8559       # build the full file storage dir path
8560       joinargs = []
8561
8562       if self.op.disk_template == constants.DT_SHARED_FILE:
8563         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8564       else:
8565         get_fsd_fn = self.cfg.GetFileStorageDir
8566
8567       cfg_storagedir = get_fsd_fn()
8568       if not cfg_storagedir:
8569         raise errors.OpPrereqError("Cluster file storage dir not defined")
8570       joinargs.append(cfg_storagedir)
8571
8572       if self.op.file_storage_dir is not None:
8573         joinargs.append(self.op.file_storage_dir)
8574
8575       joinargs.append(self.op.instance_name)
8576
8577       # pylint: disable=W0142
8578       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8579
8580   def CheckPrereq(self):
8581     """Check prerequisites.
8582
8583     """
8584     self._CalculateFileStorageDir()
8585
8586     if self.op.mode == constants.INSTANCE_IMPORT:
8587       export_info = self._ReadExportInfo()
8588       self._ReadExportParams(export_info)
8589
8590     if (not self.cfg.GetVGName() and
8591         self.op.disk_template not in constants.DTS_NOT_LVM):
8592       raise errors.OpPrereqError("Cluster does not support lvm-based"
8593                                  " instances", errors.ECODE_STATE)
8594
8595     if self.op.hypervisor is None:
8596       self.op.hypervisor = self.cfg.GetHypervisorType()
8597
8598     cluster = self.cfg.GetClusterInfo()
8599     enabled_hvs = cluster.enabled_hypervisors
8600     if self.op.hypervisor not in enabled_hvs:
8601       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8602                                  " cluster (%s)" % (self.op.hypervisor,
8603                                   ",".join(enabled_hvs)),
8604                                  errors.ECODE_STATE)
8605
8606     # Check tag validity
8607     for tag in self.op.tags:
8608       objects.TaggableObject.ValidateTag(tag)
8609
8610     # check hypervisor parameter syntax (locally)
8611     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8612     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8613                                       self.op.hvparams)
8614     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8615     hv_type.CheckParameterSyntax(filled_hvp)
8616     self.hv_full = filled_hvp
8617     # check that we don't specify global parameters on an instance
8618     _CheckGlobalHvParams(self.op.hvparams)
8619
8620     # fill and remember the beparams dict
8621     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8622     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8623
8624     # build os parameters
8625     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8626
8627     # now that hvp/bep are in final format, let's reset to defaults,
8628     # if told to do so
8629     if self.op.identify_defaults:
8630       self._RevertToDefaults(cluster)
8631
8632     # NIC buildup
8633     self.nics = []
8634     for idx, nic in enumerate(self.op.nics):
8635       nic_mode_req = nic.get(constants.INIC_MODE, None)
8636       nic_mode = nic_mode_req
8637       if nic_mode is None:
8638         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8639
8640       # in routed mode, for the first nic, the default ip is 'auto'
8641       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8642         default_ip_mode = constants.VALUE_AUTO
8643       else:
8644         default_ip_mode = constants.VALUE_NONE
8645
8646       # ip validity checks
8647       ip = nic.get(constants.INIC_IP, default_ip_mode)
8648       if ip is None or ip.lower() == constants.VALUE_NONE:
8649         nic_ip = None
8650       elif ip.lower() == constants.VALUE_AUTO:
8651         if not self.op.name_check:
8652           raise errors.OpPrereqError("IP address set to auto but name checks"
8653                                      " have been skipped",
8654                                      errors.ECODE_INVAL)
8655         nic_ip = self.hostname1.ip
8656       else:
8657         if not netutils.IPAddress.IsValid(ip):
8658           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8659                                      errors.ECODE_INVAL)
8660         nic_ip = ip
8661
8662       # TODO: check the ip address for uniqueness
8663       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8664         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8665                                    errors.ECODE_INVAL)
8666
8667       # MAC address verification
8668       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8669       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8670         mac = utils.NormalizeAndValidateMac(mac)
8671
8672         try:
8673           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8674         except errors.ReservationError:
8675           raise errors.OpPrereqError("MAC address %s already in use"
8676                                      " in cluster" % mac,
8677                                      errors.ECODE_NOTUNIQUE)
8678
8679       #  Build nic parameters
8680       link = nic.get(constants.INIC_LINK, None)
8681       nicparams = {}
8682       if nic_mode_req:
8683         nicparams[constants.NIC_MODE] = nic_mode_req
8684       if link:
8685         nicparams[constants.NIC_LINK] = link
8686
8687       check_params = cluster.SimpleFillNIC(nicparams)
8688       objects.NIC.CheckParameterSyntax(check_params)
8689       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8690
8691     # disk checks/pre-build
8692     default_vg = self.cfg.GetVGName()
8693     self.disks = []
8694     for disk in self.op.disks:
8695       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8696       if mode not in constants.DISK_ACCESS_SET:
8697         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8698                                    mode, errors.ECODE_INVAL)
8699       size = disk.get(constants.IDISK_SIZE, None)
8700       if size is None:
8701         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8702       try:
8703         size = int(size)
8704       except (TypeError, ValueError):
8705         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8706                                    errors.ECODE_INVAL)
8707
8708       data_vg = disk.get(constants.IDISK_VG, default_vg)
8709       new_disk = {
8710         constants.IDISK_SIZE: size,
8711         constants.IDISK_MODE: mode,
8712         constants.IDISK_VG: data_vg,
8713         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8714         }
8715       if constants.IDISK_ADOPT in disk:
8716         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8717       self.disks.append(new_disk)
8718
8719     if self.op.mode == constants.INSTANCE_IMPORT:
8720
8721       # Check that the new instance doesn't have less disks than the export
8722       instance_disks = len(self.disks)
8723       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8724       if instance_disks < export_disks:
8725         raise errors.OpPrereqError("Not enough disks to import."
8726                                    " (instance: %d, export: %d)" %
8727                                    (instance_disks, export_disks),
8728                                    errors.ECODE_INVAL)
8729
8730       disk_images = []
8731       for idx in range(export_disks):
8732         option = "disk%d_dump" % idx
8733         if export_info.has_option(constants.INISECT_INS, option):
8734           # FIXME: are the old os-es, disk sizes, etc. useful?
8735           export_name = export_info.get(constants.INISECT_INS, option)
8736           image = utils.PathJoin(self.op.src_path, export_name)
8737           disk_images.append(image)
8738         else:
8739           disk_images.append(False)
8740
8741       self.src_images = disk_images
8742
8743       old_name = export_info.get(constants.INISECT_INS, "name")
8744       try:
8745         exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8746       except (TypeError, ValueError), err:
8747         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8748                                    " an integer: %s" % str(err),
8749                                    errors.ECODE_STATE)
8750       if self.op.instance_name == old_name:
8751         for idx, nic in enumerate(self.nics):
8752           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8753             nic_mac_ini = "nic%d_mac" % idx
8754             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8755
8756     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8757
8758     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8759     if self.op.ip_check:
8760       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8761         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8762                                    (self.check_ip, self.op.instance_name),
8763                                    errors.ECODE_NOTUNIQUE)
8764
8765     #### mac address generation
8766     # By generating here the mac address both the allocator and the hooks get
8767     # the real final mac address rather than the 'auto' or 'generate' value.
8768     # There is a race condition between the generation and the instance object
8769     # creation, which means that we know the mac is valid now, but we're not
8770     # sure it will be when we actually add the instance. If things go bad
8771     # adding the instance will abort because of a duplicate mac, and the
8772     # creation job will fail.
8773     for nic in self.nics:
8774       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8775         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8776
8777     #### allocator run
8778
8779     if self.op.iallocator is not None:
8780       self._RunAllocator()
8781
8782     #### node related checks
8783
8784     # check primary node
8785     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8786     assert self.pnode is not None, \
8787       "Cannot retrieve locked node %s" % self.op.pnode
8788     if pnode.offline:
8789       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8790                                  pnode.name, errors.ECODE_STATE)
8791     if pnode.drained:
8792       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8793                                  pnode.name, errors.ECODE_STATE)
8794     if not pnode.vm_capable:
8795       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8796                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8797
8798     self.secondaries = []
8799
8800     # mirror node verification
8801     if self.op.disk_template in constants.DTS_INT_MIRROR:
8802       if self.op.snode == pnode.name:
8803         raise errors.OpPrereqError("The secondary node cannot be the"
8804                                    " primary node", errors.ECODE_INVAL)
8805       _CheckNodeOnline(self, self.op.snode)
8806       _CheckNodeNotDrained(self, self.op.snode)
8807       _CheckNodeVmCapable(self, self.op.snode)
8808       self.secondaries.append(self.op.snode)
8809
8810     nodenames = [pnode.name] + self.secondaries
8811
8812     if not self.adopt_disks:
8813       # Check lv size requirements, if not adopting
8814       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8815       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8816
8817     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8818       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8819                                 disk[constants.IDISK_ADOPT])
8820                      for disk in self.disks])
8821       if len(all_lvs) != len(self.disks):
8822         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8823                                    errors.ECODE_INVAL)
8824       for lv_name in all_lvs:
8825         try:
8826           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8827           # to ReserveLV uses the same syntax
8828           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8829         except errors.ReservationError:
8830           raise errors.OpPrereqError("LV named %s used by another instance" %
8831                                      lv_name, errors.ECODE_NOTUNIQUE)
8832
8833       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8834       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8835
8836       node_lvs = self.rpc.call_lv_list([pnode.name],
8837                                        vg_names.payload.keys())[pnode.name]
8838       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8839       node_lvs = node_lvs.payload
8840
8841       delta = all_lvs.difference(node_lvs.keys())
8842       if delta:
8843         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8844                                    utils.CommaJoin(delta),
8845                                    errors.ECODE_INVAL)
8846       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8847       if online_lvs:
8848         raise errors.OpPrereqError("Online logical volumes found, cannot"
8849                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8850                                    errors.ECODE_STATE)
8851       # update the size of disk based on what is found
8852       for dsk in self.disks:
8853         dsk[constants.IDISK_SIZE] = \
8854           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8855                                         dsk[constants.IDISK_ADOPT])][0]))
8856
8857     elif self.op.disk_template == constants.DT_BLOCK:
8858       # Normalize and de-duplicate device paths
8859       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8860                        for disk in self.disks])
8861       if len(all_disks) != len(self.disks):
8862         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8863                                    errors.ECODE_INVAL)
8864       baddisks = [d for d in all_disks
8865                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8866       if baddisks:
8867         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8868                                    " cannot be adopted" %
8869                                    (", ".join(baddisks),
8870                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8871                                    errors.ECODE_INVAL)
8872
8873       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8874                                             list(all_disks))[pnode.name]
8875       node_disks.Raise("Cannot get block device information from node %s" %
8876                        pnode.name)
8877       node_disks = node_disks.payload
8878       delta = all_disks.difference(node_disks.keys())
8879       if delta:
8880         raise errors.OpPrereqError("Missing block device(s): %s" %
8881                                    utils.CommaJoin(delta),
8882                                    errors.ECODE_INVAL)
8883       for dsk in self.disks:
8884         dsk[constants.IDISK_SIZE] = \
8885           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8886
8887     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8888
8889     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8890     # check OS parameters (remotely)
8891     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8892
8893     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8894
8895     # memory check on primary node
8896     if self.op.start:
8897       _CheckNodeFreeMemory(self, self.pnode.name,
8898                            "creating instance %s" % self.op.instance_name,
8899                            self.be_full[constants.BE_MEMORY],
8900                            self.op.hypervisor)
8901
8902     self.dry_run_result = list(nodenames)
8903
8904   def Exec(self, feedback_fn):
8905     """Create and add the instance to the cluster.
8906
8907     """
8908     instance = self.op.instance_name
8909     pnode_name = self.pnode.name
8910
8911     ht_kind = self.op.hypervisor
8912     if ht_kind in constants.HTS_REQ_PORT:
8913       network_port = self.cfg.AllocatePort()
8914     else:
8915       network_port = None
8916
8917     disks = _GenerateDiskTemplate(self,
8918                                   self.op.disk_template,
8919                                   instance, pnode_name,
8920                                   self.secondaries,
8921                                   self.disks,
8922                                   self.instance_file_storage_dir,
8923                                   self.op.file_driver,
8924                                   0,
8925                                   feedback_fn)
8926
8927     iobj = objects.Instance(name=instance, os=self.op.os_type,
8928                             primary_node=pnode_name,
8929                             nics=self.nics, disks=disks,
8930                             disk_template=self.op.disk_template,
8931                             admin_up=False,
8932                             network_port=network_port,
8933                             beparams=self.op.beparams,
8934                             hvparams=self.op.hvparams,
8935                             hypervisor=self.op.hypervisor,
8936                             osparams=self.op.osparams,
8937                             )
8938
8939     if self.op.tags:
8940       for tag in self.op.tags:
8941         iobj.AddTag(tag)
8942
8943     if self.adopt_disks:
8944       if self.op.disk_template == constants.DT_PLAIN:
8945         # rename LVs to the newly-generated names; we need to construct
8946         # 'fake' LV disks with the old data, plus the new unique_id
8947         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8948         rename_to = []
8949         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8950           rename_to.append(t_dsk.logical_id)
8951           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8952           self.cfg.SetDiskID(t_dsk, pnode_name)
8953         result = self.rpc.call_blockdev_rename(pnode_name,
8954                                                zip(tmp_disks, rename_to))
8955         result.Raise("Failed to rename adoped LVs")
8956     else:
8957       feedback_fn("* creating instance disks...")
8958       try:
8959         _CreateDisks(self, iobj)
8960       except errors.OpExecError:
8961         self.LogWarning("Device creation failed, reverting...")
8962         try:
8963           _RemoveDisks(self, iobj)
8964         finally:
8965           self.cfg.ReleaseDRBDMinors(instance)
8966           raise
8967
8968     feedback_fn("adding instance %s to cluster config" % instance)
8969
8970     self.cfg.AddInstance(iobj, self.proc.GetECId())
8971
8972     # Declare that we don't want to remove the instance lock anymore, as we've
8973     # added the instance to the config
8974     del self.remove_locks[locking.LEVEL_INSTANCE]
8975
8976     if self.op.mode == constants.INSTANCE_IMPORT:
8977       # Release unused nodes
8978       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8979     else:
8980       # Release all nodes
8981       _ReleaseLocks(self, locking.LEVEL_NODE)
8982
8983     disk_abort = False
8984     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8985       feedback_fn("* wiping instance disks...")
8986       try:
8987         _WipeDisks(self, iobj)
8988       except errors.OpExecError, err:
8989         logging.exception("Wiping disks failed")
8990         self.LogWarning("Wiping instance disks failed (%s)", err)
8991         disk_abort = True
8992
8993     if disk_abort:
8994       # Something is already wrong with the disks, don't do anything else
8995       pass
8996     elif self.op.wait_for_sync:
8997       disk_abort = not _WaitForSync(self, iobj)
8998     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8999       # make sure the disks are not degraded (still sync-ing is ok)
9000       feedback_fn("* checking mirrors status")
9001       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9002     else:
9003       disk_abort = False
9004
9005     if disk_abort:
9006       _RemoveDisks(self, iobj)
9007       self.cfg.RemoveInstance(iobj.name)
9008       # Make sure the instance lock gets removed
9009       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9010       raise errors.OpExecError("There are some degraded disks for"
9011                                " this instance")
9012
9013     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9014       if self.op.mode == constants.INSTANCE_CREATE:
9015         if not self.op.no_install:
9016           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9017                         not self.op.wait_for_sync)
9018           if pause_sync:
9019             feedback_fn("* pausing disk sync to install instance OS")
9020             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9021                                                               iobj.disks, True)
9022             for idx, success in enumerate(result.payload):
9023               if not success:
9024                 logging.warn("pause-sync of instance %s for disk %d failed",
9025                              instance, idx)
9026
9027           feedback_fn("* running the instance OS create scripts...")
9028           # FIXME: pass debug option from opcode to backend
9029           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
9030                                                  self.op.debug_level)
9031           if pause_sync:
9032             feedback_fn("* resuming disk sync")
9033             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9034                                                               iobj.disks, False)
9035             for idx, success in enumerate(result.payload):
9036               if not success:
9037                 logging.warn("resume-sync of instance %s for disk %d failed",
9038                              instance, idx)
9039
9040           result.Raise("Could not add os for instance %s"
9041                        " on node %s" % (instance, pnode_name))
9042
9043       elif self.op.mode == constants.INSTANCE_IMPORT:
9044         feedback_fn("* running the instance OS import scripts...")
9045
9046         transfers = []
9047
9048         for idx, image in enumerate(self.src_images):
9049           if not image:
9050             continue
9051
9052           # FIXME: pass debug option from opcode to backend
9053           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9054                                              constants.IEIO_FILE, (image, ),
9055                                              constants.IEIO_SCRIPT,
9056                                              (iobj.disks[idx], idx),
9057                                              None)
9058           transfers.append(dt)
9059
9060         import_result = \
9061           masterd.instance.TransferInstanceData(self, feedback_fn,
9062                                                 self.op.src_node, pnode_name,
9063                                                 self.pnode.secondary_ip,
9064                                                 iobj, transfers)
9065         if not compat.all(import_result):
9066           self.LogWarning("Some disks for instance %s on node %s were not"
9067                           " imported successfully" % (instance, pnode_name))
9068
9069       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9070         feedback_fn("* preparing remote import...")
9071         # The source cluster will stop the instance before attempting to make a
9072         # connection. In some cases stopping an instance can take a long time,
9073         # hence the shutdown timeout is added to the connection timeout.
9074         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9075                            self.op.source_shutdown_timeout)
9076         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9077
9078         assert iobj.primary_node == self.pnode.name
9079         disk_results = \
9080           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9081                                         self.source_x509_ca,
9082                                         self._cds, timeouts)
9083         if not compat.all(disk_results):
9084           # TODO: Should the instance still be started, even if some disks
9085           # failed to import (valid for local imports, too)?
9086           self.LogWarning("Some disks for instance %s on node %s were not"
9087                           " imported successfully" % (instance, pnode_name))
9088
9089         # Run rename script on newly imported instance
9090         assert iobj.name == instance
9091         feedback_fn("Running rename script for %s" % instance)
9092         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9093                                                    self.source_instance_name,
9094                                                    self.op.debug_level)
9095         if result.fail_msg:
9096           self.LogWarning("Failed to run rename script for %s on node"
9097                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9098
9099       else:
9100         # also checked in the prereq part
9101         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9102                                      % self.op.mode)
9103
9104     if self.op.start:
9105       iobj.admin_up = True
9106       self.cfg.Update(iobj, feedback_fn)
9107       logging.info("Starting instance %s on node %s", instance, pnode_name)
9108       feedback_fn("* starting instance...")
9109       result = self.rpc.call_instance_start(pnode_name, iobj,
9110                                             None, None, False)
9111       result.Raise("Could not start instance")
9112
9113     return list(iobj.all_nodes)
9114
9115
9116 class LUInstanceConsole(NoHooksLU):
9117   """Connect to an instance's console.
9118
9119   This is somewhat special in that it returns the command line that
9120   you need to run on the master node in order to connect to the
9121   console.
9122
9123   """
9124   REQ_BGL = False
9125
9126   def ExpandNames(self):
9127     self._ExpandAndLockInstance()
9128
9129   def CheckPrereq(self):
9130     """Check prerequisites.
9131
9132     This checks that the instance is in the cluster.
9133
9134     """
9135     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9136     assert self.instance is not None, \
9137       "Cannot retrieve locked instance %s" % self.op.instance_name
9138     _CheckNodeOnline(self, self.instance.primary_node)
9139
9140   def Exec(self, feedback_fn):
9141     """Connect to the console of an instance
9142
9143     """
9144     instance = self.instance
9145     node = instance.primary_node
9146
9147     node_insts = self.rpc.call_instance_list([node],
9148                                              [instance.hypervisor])[node]
9149     node_insts.Raise("Can't get node information from %s" % node)
9150
9151     if instance.name not in node_insts.payload:
9152       if instance.admin_up:
9153         state = constants.INSTST_ERRORDOWN
9154       else:
9155         state = constants.INSTST_ADMINDOWN
9156       raise errors.OpExecError("Instance %s is not running (state %s)" %
9157                                (instance.name, state))
9158
9159     logging.debug("Connecting to console of %s on %s", instance.name, node)
9160
9161     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9162
9163
9164 def _GetInstanceConsole(cluster, instance):
9165   """Returns console information for an instance.
9166
9167   @type cluster: L{objects.Cluster}
9168   @type instance: L{objects.Instance}
9169   @rtype: dict
9170
9171   """
9172   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9173   # beparams and hvparams are passed separately, to avoid editing the
9174   # instance and then saving the defaults in the instance itself.
9175   hvparams = cluster.FillHV(instance)
9176   beparams = cluster.FillBE(instance)
9177   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9178
9179   assert console.instance == instance.name
9180   assert console.Validate()
9181
9182   return console.ToDict()
9183
9184
9185 class LUInstanceReplaceDisks(LogicalUnit):
9186   """Replace the disks of an instance.
9187
9188   """
9189   HPATH = "mirrors-replace"
9190   HTYPE = constants.HTYPE_INSTANCE
9191   REQ_BGL = False
9192
9193   def CheckArguments(self):
9194     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9195                                   self.op.iallocator)
9196
9197   def ExpandNames(self):
9198     self._ExpandAndLockInstance()
9199
9200     assert locking.LEVEL_NODE not in self.needed_locks
9201     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9202
9203     assert self.op.iallocator is None or self.op.remote_node is None, \
9204       "Conflicting options"
9205
9206     if self.op.remote_node is not None:
9207       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9208
9209       # Warning: do not remove the locking of the new secondary here
9210       # unless DRBD8.AddChildren is changed to work in parallel;
9211       # currently it doesn't since parallel invocations of
9212       # FindUnusedMinor will conflict
9213       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9214       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9215     else:
9216       self.needed_locks[locking.LEVEL_NODE] = []
9217       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9218
9219       if self.op.iallocator is not None:
9220         # iallocator will select a new node in the same group
9221         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9222
9223     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9224                                    self.op.iallocator, self.op.remote_node,
9225                                    self.op.disks, False, self.op.early_release)
9226
9227     self.tasklets = [self.replacer]
9228
9229   def DeclareLocks(self, level):
9230     if level == locking.LEVEL_NODEGROUP:
9231       assert self.op.remote_node is None
9232       assert self.op.iallocator is not None
9233       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9234
9235       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9236       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9237         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9238
9239     elif level == locking.LEVEL_NODE:
9240       if self.op.iallocator is not None:
9241         assert self.op.remote_node is None
9242         assert not self.needed_locks[locking.LEVEL_NODE]
9243
9244         # Lock member nodes of all locked groups
9245         self.needed_locks[locking.LEVEL_NODE] = [node_name
9246           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9247           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9248       else:
9249         self._LockInstancesNodes()
9250
9251   def BuildHooksEnv(self):
9252     """Build hooks env.
9253
9254     This runs on the master, the primary and all the secondaries.
9255
9256     """
9257     instance = self.replacer.instance
9258     env = {
9259       "MODE": self.op.mode,
9260       "NEW_SECONDARY": self.op.remote_node,
9261       "OLD_SECONDARY": instance.secondary_nodes[0],
9262       }
9263     env.update(_BuildInstanceHookEnvByObject(self, instance))
9264     return env
9265
9266   def BuildHooksNodes(self):
9267     """Build hooks nodes.
9268
9269     """
9270     instance = self.replacer.instance
9271     nl = [
9272       self.cfg.GetMasterNode(),
9273       instance.primary_node,
9274       ]
9275     if self.op.remote_node is not None:
9276       nl.append(self.op.remote_node)
9277     return nl, nl
9278
9279   def CheckPrereq(self):
9280     """Check prerequisites.
9281
9282     """
9283     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9284             self.op.iallocator is None)
9285
9286     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9287     if owned_groups:
9288       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9289
9290     return LogicalUnit.CheckPrereq(self)
9291
9292
9293 class TLReplaceDisks(Tasklet):
9294   """Replaces disks for an instance.
9295
9296   Note: Locking is not within the scope of this class.
9297
9298   """
9299   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9300                disks, delay_iallocator, early_release):
9301     """Initializes this class.
9302
9303     """
9304     Tasklet.__init__(self, lu)
9305
9306     # Parameters
9307     self.instance_name = instance_name
9308     self.mode = mode
9309     self.iallocator_name = iallocator_name
9310     self.remote_node = remote_node
9311     self.disks = disks
9312     self.delay_iallocator = delay_iallocator
9313     self.early_release = early_release
9314
9315     # Runtime data
9316     self.instance = None
9317     self.new_node = None
9318     self.target_node = None
9319     self.other_node = None
9320     self.remote_node_info = None
9321     self.node_secondary_ip = None
9322
9323   @staticmethod
9324   def CheckArguments(mode, remote_node, iallocator):
9325     """Helper function for users of this class.
9326
9327     """
9328     # check for valid parameter combination
9329     if mode == constants.REPLACE_DISK_CHG:
9330       if remote_node is None and iallocator is None:
9331         raise errors.OpPrereqError("When changing the secondary either an"
9332                                    " iallocator script must be used or the"
9333                                    " new node given", errors.ECODE_INVAL)
9334
9335       if remote_node is not None and iallocator is not None:
9336         raise errors.OpPrereqError("Give either the iallocator or the new"
9337                                    " secondary, not both", errors.ECODE_INVAL)
9338
9339     elif remote_node is not None or iallocator is not None:
9340       # Not replacing the secondary
9341       raise errors.OpPrereqError("The iallocator and new node options can"
9342                                  " only be used when changing the"
9343                                  " secondary node", errors.ECODE_INVAL)
9344
9345   @staticmethod
9346   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9347     """Compute a new secondary node using an IAllocator.
9348
9349     """
9350     ial = IAllocator(lu.cfg, lu.rpc,
9351                      mode=constants.IALLOCATOR_MODE_RELOC,
9352                      name=instance_name,
9353                      relocate_from=list(relocate_from))
9354
9355     ial.Run(iallocator_name)
9356
9357     if not ial.success:
9358       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9359                                  " %s" % (iallocator_name, ial.info),
9360                                  errors.ECODE_NORES)
9361
9362     if len(ial.result) != ial.required_nodes:
9363       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9364                                  " of nodes (%s), required %s" %
9365                                  (iallocator_name,
9366                                   len(ial.result), ial.required_nodes),
9367                                  errors.ECODE_FAULT)
9368
9369     remote_node_name = ial.result[0]
9370
9371     lu.LogInfo("Selected new secondary for instance '%s': %s",
9372                instance_name, remote_node_name)
9373
9374     return remote_node_name
9375
9376   def _FindFaultyDisks(self, node_name):
9377     """Wrapper for L{_FindFaultyInstanceDisks}.
9378
9379     """
9380     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9381                                     node_name, True)
9382
9383   def _CheckDisksActivated(self, instance):
9384     """Checks if the instance disks are activated.
9385
9386     @param instance: The instance to check disks
9387     @return: True if they are activated, False otherwise
9388
9389     """
9390     nodes = instance.all_nodes
9391
9392     for idx, dev in enumerate(instance.disks):
9393       for node in nodes:
9394         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9395         self.cfg.SetDiskID(dev, node)
9396
9397         result = self.rpc.call_blockdev_find(node, dev)
9398
9399         if result.offline:
9400           continue
9401         elif result.fail_msg or not result.payload:
9402           return False
9403
9404     return True
9405
9406   def CheckPrereq(self):
9407     """Check prerequisites.
9408
9409     This checks that the instance is in the cluster.
9410
9411     """
9412     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9413     assert instance is not None, \
9414       "Cannot retrieve locked instance %s" % self.instance_name
9415
9416     if instance.disk_template != constants.DT_DRBD8:
9417       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9418                                  " instances", errors.ECODE_INVAL)
9419
9420     if len(instance.secondary_nodes) != 1:
9421       raise errors.OpPrereqError("The instance has a strange layout,"
9422                                  " expected one secondary but found %d" %
9423                                  len(instance.secondary_nodes),
9424                                  errors.ECODE_FAULT)
9425
9426     if not self.delay_iallocator:
9427       self._CheckPrereq2()
9428
9429   def _CheckPrereq2(self):
9430     """Check prerequisites, second part.
9431
9432     This function should always be part of CheckPrereq. It was separated and is
9433     now called from Exec because during node evacuation iallocator was only
9434     called with an unmodified cluster model, not taking planned changes into
9435     account.
9436
9437     """
9438     instance = self.instance
9439     secondary_node = instance.secondary_nodes[0]
9440
9441     if self.iallocator_name is None:
9442       remote_node = self.remote_node
9443     else:
9444       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9445                                        instance.name, instance.secondary_nodes)
9446
9447     if remote_node is None:
9448       self.remote_node_info = None
9449     else:
9450       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9451              "Remote node '%s' is not locked" % remote_node
9452
9453       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9454       assert self.remote_node_info is not None, \
9455         "Cannot retrieve locked node %s" % remote_node
9456
9457     if remote_node == self.instance.primary_node:
9458       raise errors.OpPrereqError("The specified node is the primary node of"
9459                                  " the instance", errors.ECODE_INVAL)
9460
9461     if remote_node == secondary_node:
9462       raise errors.OpPrereqError("The specified node is already the"
9463                                  " secondary node of the instance",
9464                                  errors.ECODE_INVAL)
9465
9466     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9467                                     constants.REPLACE_DISK_CHG):
9468       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9469                                  errors.ECODE_INVAL)
9470
9471     if self.mode == constants.REPLACE_DISK_AUTO:
9472       if not self._CheckDisksActivated(instance):
9473         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9474                                    " first" % self.instance_name,
9475                                    errors.ECODE_STATE)
9476       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9477       faulty_secondary = self._FindFaultyDisks(secondary_node)
9478
9479       if faulty_primary and faulty_secondary:
9480         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9481                                    " one node and can not be repaired"
9482                                    " automatically" % self.instance_name,
9483                                    errors.ECODE_STATE)
9484
9485       if faulty_primary:
9486         self.disks = faulty_primary
9487         self.target_node = instance.primary_node
9488         self.other_node = secondary_node
9489         check_nodes = [self.target_node, self.other_node]
9490       elif faulty_secondary:
9491         self.disks = faulty_secondary
9492         self.target_node = secondary_node
9493         self.other_node = instance.primary_node
9494         check_nodes = [self.target_node, self.other_node]
9495       else:
9496         self.disks = []
9497         check_nodes = []
9498
9499     else:
9500       # Non-automatic modes
9501       if self.mode == constants.REPLACE_DISK_PRI:
9502         self.target_node = instance.primary_node
9503         self.other_node = secondary_node
9504         check_nodes = [self.target_node, self.other_node]
9505
9506       elif self.mode == constants.REPLACE_DISK_SEC:
9507         self.target_node = secondary_node
9508         self.other_node = instance.primary_node
9509         check_nodes = [self.target_node, self.other_node]
9510
9511       elif self.mode == constants.REPLACE_DISK_CHG:
9512         self.new_node = remote_node
9513         self.other_node = instance.primary_node
9514         self.target_node = secondary_node
9515         check_nodes = [self.new_node, self.other_node]
9516
9517         _CheckNodeNotDrained(self.lu, remote_node)
9518         _CheckNodeVmCapable(self.lu, remote_node)
9519
9520         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9521         assert old_node_info is not None
9522         if old_node_info.offline and not self.early_release:
9523           # doesn't make sense to delay the release
9524           self.early_release = True
9525           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9526                           " early-release mode", secondary_node)
9527
9528       else:
9529         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9530                                      self.mode)
9531
9532       # If not specified all disks should be replaced
9533       if not self.disks:
9534         self.disks = range(len(self.instance.disks))
9535
9536     for node in check_nodes:
9537       _CheckNodeOnline(self.lu, node)
9538
9539     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9540                                                           self.other_node,
9541                                                           self.target_node]
9542                               if node_name is not None)
9543
9544     # Release unneeded node locks
9545     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9546
9547     # Release any owned node group
9548     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9549       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9550
9551     # Check whether disks are valid
9552     for disk_idx in self.disks:
9553       instance.FindDisk(disk_idx)
9554
9555     # Get secondary node IP addresses
9556     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9557                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9558
9559   def Exec(self, feedback_fn):
9560     """Execute disk replacement.
9561
9562     This dispatches the disk replacement to the appropriate handler.
9563
9564     """
9565     if self.delay_iallocator:
9566       self._CheckPrereq2()
9567
9568     if __debug__:
9569       # Verify owned locks before starting operation
9570       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9571       assert set(owned_nodes) == set(self.node_secondary_ip), \
9572           ("Incorrect node locks, owning %s, expected %s" %
9573            (owned_nodes, self.node_secondary_ip.keys()))
9574
9575       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9576       assert list(owned_instances) == [self.instance_name], \
9577           "Instance '%s' not locked" % self.instance_name
9578
9579       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9580           "Should not own any node group lock at this point"
9581
9582     if not self.disks:
9583       feedback_fn("No disks need replacement")
9584       return
9585
9586     feedback_fn("Replacing disk(s) %s for %s" %
9587                 (utils.CommaJoin(self.disks), self.instance.name))
9588
9589     activate_disks = (not self.instance.admin_up)
9590
9591     # Activate the instance disks if we're replacing them on a down instance
9592     if activate_disks:
9593       _StartInstanceDisks(self.lu, self.instance, True)
9594
9595     try:
9596       # Should we replace the secondary node?
9597       if self.new_node is not None:
9598         fn = self._ExecDrbd8Secondary
9599       else:
9600         fn = self._ExecDrbd8DiskOnly
9601
9602       result = fn(feedback_fn)
9603     finally:
9604       # Deactivate the instance disks if we're replacing them on a
9605       # down instance
9606       if activate_disks:
9607         _SafeShutdownInstanceDisks(self.lu, self.instance)
9608
9609     if __debug__:
9610       # Verify owned locks
9611       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9612       nodes = frozenset(self.node_secondary_ip)
9613       assert ((self.early_release and not owned_nodes) or
9614               (not self.early_release and not (set(owned_nodes) - nodes))), \
9615         ("Not owning the correct locks, early_release=%s, owned=%r,"
9616          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9617
9618     return result
9619
9620   def _CheckVolumeGroup(self, nodes):
9621     self.lu.LogInfo("Checking volume groups")
9622
9623     vgname = self.cfg.GetVGName()
9624
9625     # Make sure volume group exists on all involved nodes
9626     results = self.rpc.call_vg_list(nodes)
9627     if not results:
9628       raise errors.OpExecError("Can't list volume groups on the nodes")
9629
9630     for node in nodes:
9631       res = results[node]
9632       res.Raise("Error checking node %s" % node)
9633       if vgname not in res.payload:
9634         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9635                                  (vgname, node))
9636
9637   def _CheckDisksExistence(self, nodes):
9638     # Check disk existence
9639     for idx, dev in enumerate(self.instance.disks):
9640       if idx not in self.disks:
9641         continue
9642
9643       for node in nodes:
9644         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9645         self.cfg.SetDiskID(dev, node)
9646
9647         result = self.rpc.call_blockdev_find(node, dev)
9648
9649         msg = result.fail_msg
9650         if msg or not result.payload:
9651           if not msg:
9652             msg = "disk not found"
9653           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9654                                    (idx, node, msg))
9655
9656   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9657     for idx, dev in enumerate(self.instance.disks):
9658       if idx not in self.disks:
9659         continue
9660
9661       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9662                       (idx, node_name))
9663
9664       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9665                                    ldisk=ldisk):
9666         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9667                                  " replace disks for instance %s" %
9668                                  (node_name, self.instance.name))
9669
9670   def _CreateNewStorage(self, node_name):
9671     """Create new storage on the primary or secondary node.
9672
9673     This is only used for same-node replaces, not for changing the
9674     secondary node, hence we don't want to modify the existing disk.
9675
9676     """
9677     iv_names = {}
9678
9679     for idx, dev in enumerate(self.instance.disks):
9680       if idx not in self.disks:
9681         continue
9682
9683       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9684
9685       self.cfg.SetDiskID(dev, node_name)
9686
9687       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9688       names = _GenerateUniqueNames(self.lu, lv_names)
9689
9690       vg_data = dev.children[0].logical_id[0]
9691       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9692                              logical_id=(vg_data, names[0]))
9693       vg_meta = dev.children[1].logical_id[0]
9694       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9695                              logical_id=(vg_meta, names[1]))
9696
9697       new_lvs = [lv_data, lv_meta]
9698       old_lvs = [child.Copy() for child in dev.children]
9699       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9700
9701       # we pass force_create=True to force the LVM creation
9702       for new_lv in new_lvs:
9703         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9704                         _GetInstanceInfoText(self.instance), False)
9705
9706     return iv_names
9707
9708   def _CheckDevices(self, node_name, iv_names):
9709     for name, (dev, _, _) in iv_names.iteritems():
9710       self.cfg.SetDiskID(dev, node_name)
9711
9712       result = self.rpc.call_blockdev_find(node_name, dev)
9713
9714       msg = result.fail_msg
9715       if msg or not result.payload:
9716         if not msg:
9717           msg = "disk not found"
9718         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9719                                  (name, msg))
9720
9721       if result.payload.is_degraded:
9722         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9723
9724   def _RemoveOldStorage(self, node_name, iv_names):
9725     for name, (_, old_lvs, _) in iv_names.iteritems():
9726       self.lu.LogInfo("Remove logical volumes for %s" % name)
9727
9728       for lv in old_lvs:
9729         self.cfg.SetDiskID(lv, node_name)
9730
9731         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9732         if msg:
9733           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9734                              hint="remove unused LVs manually")
9735
9736   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9737     """Replace a disk on the primary or secondary for DRBD 8.
9738
9739     The algorithm for replace is quite complicated:
9740
9741       1. for each disk to be replaced:
9742
9743         1. create new LVs on the target node with unique names
9744         1. detach old LVs from the drbd device
9745         1. rename old LVs to name_replaced.<time_t>
9746         1. rename new LVs to old LVs
9747         1. attach the new LVs (with the old names now) to the drbd device
9748
9749       1. wait for sync across all devices
9750
9751       1. for each modified disk:
9752
9753         1. remove old LVs (which have the name name_replaces.<time_t>)
9754
9755     Failures are not very well handled.
9756
9757     """
9758     steps_total = 6
9759
9760     # Step: check device activation
9761     self.lu.LogStep(1, steps_total, "Check device existence")
9762     self._CheckDisksExistence([self.other_node, self.target_node])
9763     self._CheckVolumeGroup([self.target_node, self.other_node])
9764
9765     # Step: check other node consistency
9766     self.lu.LogStep(2, steps_total, "Check peer consistency")
9767     self._CheckDisksConsistency(self.other_node,
9768                                 self.other_node == self.instance.primary_node,
9769                                 False)
9770
9771     # Step: create new storage
9772     self.lu.LogStep(3, steps_total, "Allocate new storage")
9773     iv_names = self._CreateNewStorage(self.target_node)
9774
9775     # Step: for each lv, detach+rename*2+attach
9776     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9777     for dev, old_lvs, new_lvs in iv_names.itervalues():
9778       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9779
9780       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9781                                                      old_lvs)
9782       result.Raise("Can't detach drbd from local storage on node"
9783                    " %s for device %s" % (self.target_node, dev.iv_name))
9784       #dev.children = []
9785       #cfg.Update(instance)
9786
9787       # ok, we created the new LVs, so now we know we have the needed
9788       # storage; as such, we proceed on the target node to rename
9789       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9790       # using the assumption that logical_id == physical_id (which in
9791       # turn is the unique_id on that node)
9792
9793       # FIXME(iustin): use a better name for the replaced LVs
9794       temp_suffix = int(time.time())
9795       ren_fn = lambda d, suff: (d.physical_id[0],
9796                                 d.physical_id[1] + "_replaced-%s" % suff)
9797
9798       # Build the rename list based on what LVs exist on the node
9799       rename_old_to_new = []
9800       for to_ren in old_lvs:
9801         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9802         if not result.fail_msg and result.payload:
9803           # device exists
9804           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9805
9806       self.lu.LogInfo("Renaming the old LVs on the target node")
9807       result = self.rpc.call_blockdev_rename(self.target_node,
9808                                              rename_old_to_new)
9809       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9810
9811       # Now we rename the new LVs to the old LVs
9812       self.lu.LogInfo("Renaming the new LVs on the target node")
9813       rename_new_to_old = [(new, old.physical_id)
9814                            for old, new in zip(old_lvs, new_lvs)]
9815       result = self.rpc.call_blockdev_rename(self.target_node,
9816                                              rename_new_to_old)
9817       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9818
9819       # Intermediate steps of in memory modifications
9820       for old, new in zip(old_lvs, new_lvs):
9821         new.logical_id = old.logical_id
9822         self.cfg.SetDiskID(new, self.target_node)
9823
9824       # We need to modify old_lvs so that removal later removes the
9825       # right LVs, not the newly added ones; note that old_lvs is a
9826       # copy here
9827       for disk in old_lvs:
9828         disk.logical_id = ren_fn(disk, temp_suffix)
9829         self.cfg.SetDiskID(disk, self.target_node)
9830
9831       # Now that the new lvs have the old name, we can add them to the device
9832       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9833       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9834                                                   new_lvs)
9835       msg = result.fail_msg
9836       if msg:
9837         for new_lv in new_lvs:
9838           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9839                                                new_lv).fail_msg
9840           if msg2:
9841             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9842                                hint=("cleanup manually the unused logical"
9843                                      "volumes"))
9844         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9845
9846     cstep = 5
9847     if self.early_release:
9848       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9849       cstep += 1
9850       self._RemoveOldStorage(self.target_node, iv_names)
9851       # WARNING: we release both node locks here, do not do other RPCs
9852       # than WaitForSync to the primary node
9853       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9854                     names=[self.target_node, self.other_node])
9855
9856     # Wait for sync
9857     # This can fail as the old devices are degraded and _WaitForSync
9858     # does a combined result over all disks, so we don't check its return value
9859     self.lu.LogStep(cstep, steps_total, "Sync devices")
9860     cstep += 1
9861     _WaitForSync(self.lu, self.instance)
9862
9863     # Check all devices manually
9864     self._CheckDevices(self.instance.primary_node, iv_names)
9865
9866     # Step: remove old storage
9867     if not self.early_release:
9868       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9869       cstep += 1
9870       self._RemoveOldStorage(self.target_node, iv_names)
9871
9872   def _ExecDrbd8Secondary(self, feedback_fn):
9873     """Replace the secondary node for DRBD 8.
9874
9875     The algorithm for replace is quite complicated:
9876       - for all disks of the instance:
9877         - create new LVs on the new node with same names
9878         - shutdown the drbd device on the old secondary
9879         - disconnect the drbd network on the primary
9880         - create the drbd device on the new secondary
9881         - network attach the drbd on the primary, using an artifice:
9882           the drbd code for Attach() will connect to the network if it
9883           finds a device which is connected to the good local disks but
9884           not network enabled
9885       - wait for sync across all devices
9886       - remove all disks from the old secondary
9887
9888     Failures are not very well handled.
9889
9890     """
9891     steps_total = 6
9892
9893     pnode = self.instance.primary_node
9894
9895     # Step: check device activation
9896     self.lu.LogStep(1, steps_total, "Check device existence")
9897     self._CheckDisksExistence([self.instance.primary_node])
9898     self._CheckVolumeGroup([self.instance.primary_node])
9899
9900     # Step: check other node consistency
9901     self.lu.LogStep(2, steps_total, "Check peer consistency")
9902     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9903
9904     # Step: create new storage
9905     self.lu.LogStep(3, steps_total, "Allocate new storage")
9906     for idx, dev in enumerate(self.instance.disks):
9907       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9908                       (self.new_node, idx))
9909       # we pass force_create=True to force LVM creation
9910       for new_lv in dev.children:
9911         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9912                         _GetInstanceInfoText(self.instance), False)
9913
9914     # Step 4: dbrd minors and drbd setups changes
9915     # after this, we must manually remove the drbd minors on both the
9916     # error and the success paths
9917     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9918     minors = self.cfg.AllocateDRBDMinor([self.new_node
9919                                          for dev in self.instance.disks],
9920                                         self.instance.name)
9921     logging.debug("Allocated minors %r", minors)
9922
9923     iv_names = {}
9924     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9925       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9926                       (self.new_node, idx))
9927       # create new devices on new_node; note that we create two IDs:
9928       # one without port, so the drbd will be activated without
9929       # networking information on the new node at this stage, and one
9930       # with network, for the latter activation in step 4
9931       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9932       if self.instance.primary_node == o_node1:
9933         p_minor = o_minor1
9934       else:
9935         assert self.instance.primary_node == o_node2, "Three-node instance?"
9936         p_minor = o_minor2
9937
9938       new_alone_id = (self.instance.primary_node, self.new_node, None,
9939                       p_minor, new_minor, o_secret)
9940       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9941                     p_minor, new_minor, o_secret)
9942
9943       iv_names[idx] = (dev, dev.children, new_net_id)
9944       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9945                     new_net_id)
9946       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9947                               logical_id=new_alone_id,
9948                               children=dev.children,
9949                               size=dev.size)
9950       try:
9951         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9952                               _GetInstanceInfoText(self.instance), False)
9953       except errors.GenericError:
9954         self.cfg.ReleaseDRBDMinors(self.instance.name)
9955         raise
9956
9957     # We have new devices, shutdown the drbd on the old secondary
9958     for idx, dev in enumerate(self.instance.disks):
9959       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9960       self.cfg.SetDiskID(dev, self.target_node)
9961       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9962       if msg:
9963         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9964                            "node: %s" % (idx, msg),
9965                            hint=("Please cleanup this device manually as"
9966                                  " soon as possible"))
9967
9968     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9969     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9970                                                self.instance.disks)[pnode]
9971
9972     msg = result.fail_msg
9973     if msg:
9974       # detaches didn't succeed (unlikely)
9975       self.cfg.ReleaseDRBDMinors(self.instance.name)
9976       raise errors.OpExecError("Can't detach the disks from the network on"
9977                                " old node: %s" % (msg,))
9978
9979     # if we managed to detach at least one, we update all the disks of
9980     # the instance to point to the new secondary
9981     self.lu.LogInfo("Updating instance configuration")
9982     for dev, _, new_logical_id in iv_names.itervalues():
9983       dev.logical_id = new_logical_id
9984       self.cfg.SetDiskID(dev, self.instance.primary_node)
9985
9986     self.cfg.Update(self.instance, feedback_fn)
9987
9988     # and now perform the drbd attach
9989     self.lu.LogInfo("Attaching primary drbds to new secondary"
9990                     " (standalone => connected)")
9991     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9992                                             self.new_node],
9993                                            self.node_secondary_ip,
9994                                            self.instance.disks,
9995                                            self.instance.name,
9996                                            False)
9997     for to_node, to_result in result.items():
9998       msg = to_result.fail_msg
9999       if msg:
10000         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10001                            to_node, msg,
10002                            hint=("please do a gnt-instance info to see the"
10003                                  " status of disks"))
10004     cstep = 5
10005     if self.early_release:
10006       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10007       cstep += 1
10008       self._RemoveOldStorage(self.target_node, iv_names)
10009       # WARNING: we release all node locks here, do not do other RPCs
10010       # than WaitForSync to the primary node
10011       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10012                     names=[self.instance.primary_node,
10013                            self.target_node,
10014                            self.new_node])
10015
10016     # Wait for sync
10017     # This can fail as the old devices are degraded and _WaitForSync
10018     # does a combined result over all disks, so we don't check its return value
10019     self.lu.LogStep(cstep, steps_total, "Sync devices")
10020     cstep += 1
10021     _WaitForSync(self.lu, self.instance)
10022
10023     # Check all devices manually
10024     self._CheckDevices(self.instance.primary_node, iv_names)
10025
10026     # Step: remove old storage
10027     if not self.early_release:
10028       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10029       self._RemoveOldStorage(self.target_node, iv_names)
10030
10031
10032 class LURepairNodeStorage(NoHooksLU):
10033   """Repairs the volume group on a node.
10034
10035   """
10036   REQ_BGL = False
10037
10038   def CheckArguments(self):
10039     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10040
10041     storage_type = self.op.storage_type
10042
10043     if (constants.SO_FIX_CONSISTENCY not in
10044         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10045       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10046                                  " repaired" % storage_type,
10047                                  errors.ECODE_INVAL)
10048
10049   def ExpandNames(self):
10050     self.needed_locks = {
10051       locking.LEVEL_NODE: [self.op.node_name],
10052       }
10053
10054   def _CheckFaultyDisks(self, instance, node_name):
10055     """Ensure faulty disks abort the opcode or at least warn."""
10056     try:
10057       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10058                                   node_name, True):
10059         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10060                                    " node '%s'" % (instance.name, node_name),
10061                                    errors.ECODE_STATE)
10062     except errors.OpPrereqError, err:
10063       if self.op.ignore_consistency:
10064         self.proc.LogWarning(str(err.args[0]))
10065       else:
10066         raise
10067
10068   def CheckPrereq(self):
10069     """Check prerequisites.
10070
10071     """
10072     # Check whether any instance on this node has faulty disks
10073     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10074       if not inst.admin_up:
10075         continue
10076       check_nodes = set(inst.all_nodes)
10077       check_nodes.discard(self.op.node_name)
10078       for inst_node_name in check_nodes:
10079         self._CheckFaultyDisks(inst, inst_node_name)
10080
10081   def Exec(self, feedback_fn):
10082     feedback_fn("Repairing storage unit '%s' on %s ..." %
10083                 (self.op.name, self.op.node_name))
10084
10085     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10086     result = self.rpc.call_storage_execute(self.op.node_name,
10087                                            self.op.storage_type, st_args,
10088                                            self.op.name,
10089                                            constants.SO_FIX_CONSISTENCY)
10090     result.Raise("Failed to repair storage unit '%s' on %s" %
10091                  (self.op.name, self.op.node_name))
10092
10093
10094 class LUNodeEvacuate(NoHooksLU):
10095   """Evacuates instances off a list of nodes.
10096
10097   """
10098   REQ_BGL = False
10099
10100   def CheckArguments(self):
10101     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10102
10103   def ExpandNames(self):
10104     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10105
10106     if self.op.remote_node is not None:
10107       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10108       assert self.op.remote_node
10109
10110       if self.op.remote_node == self.op.node_name:
10111         raise errors.OpPrereqError("Can not use evacuated node as a new"
10112                                    " secondary node", errors.ECODE_INVAL)
10113
10114       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10115         raise errors.OpPrereqError("Without the use of an iallocator only"
10116                                    " secondary instances can be evacuated",
10117                                    errors.ECODE_INVAL)
10118
10119     # Declare locks
10120     self.share_locks = _ShareAll()
10121     self.needed_locks = {
10122       locking.LEVEL_INSTANCE: [],
10123       locking.LEVEL_NODEGROUP: [],
10124       locking.LEVEL_NODE: [],
10125       }
10126
10127     # Determine nodes (via group) optimistically, needs verification once locks
10128     # have been acquired
10129     self.lock_nodes = self._DetermineNodes()
10130
10131   def _DetermineNodes(self):
10132     """Gets the list of nodes to operate on.
10133
10134     """
10135     if self.op.remote_node is None:
10136       # Iallocator will choose any node(s) in the same group
10137       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10138     else:
10139       group_nodes = frozenset([self.op.remote_node])
10140
10141     # Determine nodes to be locked
10142     return set([self.op.node_name]) | group_nodes
10143
10144   def _DetermineInstances(self):
10145     """Builds list of instances to operate on.
10146
10147     """
10148     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10149
10150     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10151       # Primary instances only
10152       inst_fn = _GetNodePrimaryInstances
10153       assert self.op.remote_node is None, \
10154         "Evacuating primary instances requires iallocator"
10155     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10156       # Secondary instances only
10157       inst_fn = _GetNodeSecondaryInstances
10158     else:
10159       # All instances
10160       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10161       inst_fn = _GetNodeInstances
10162       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10163       # per instance
10164       raise errors.OpPrereqError("Due to an issue with the iallocator"
10165                                  " interface it is not possible to evacuate"
10166                                  " all instances at once; specify explicitly"
10167                                  " whether to evacuate primary or secondary"
10168                                  " instances",
10169                                  errors.ECODE_INVAL)
10170
10171     return inst_fn(self.cfg, self.op.node_name)
10172
10173   def DeclareLocks(self, level):
10174     if level == locking.LEVEL_INSTANCE:
10175       # Lock instances optimistically, needs verification once node and group
10176       # locks have been acquired
10177       self.needed_locks[locking.LEVEL_INSTANCE] = \
10178         set(i.name for i in self._DetermineInstances())
10179
10180     elif level == locking.LEVEL_NODEGROUP:
10181       # Lock node groups for all potential target nodes optimistically, needs
10182       # verification once nodes have been acquired
10183       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10184         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10185
10186     elif level == locking.LEVEL_NODE:
10187       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10188
10189   def CheckPrereq(self):
10190     # Verify locks
10191     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10192     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10193     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10194
10195     need_nodes = self._DetermineNodes()
10196
10197     if not owned_nodes.issuperset(need_nodes):
10198       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10199                                  " locks were acquired, current nodes are"
10200                                  " are '%s', used to be '%s'; retry the"
10201                                  " operation" %
10202                                  (self.op.node_name,
10203                                   utils.CommaJoin(need_nodes),
10204                                   utils.CommaJoin(owned_nodes)),
10205                                  errors.ECODE_STATE)
10206
10207     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10208     if owned_groups != wanted_groups:
10209       raise errors.OpExecError("Node groups changed since locks were acquired,"
10210                                " current groups are '%s', used to be '%s';"
10211                                " retry the operation" %
10212                                (utils.CommaJoin(wanted_groups),
10213                                 utils.CommaJoin(owned_groups)))
10214
10215     # Determine affected instances
10216     self.instances = self._DetermineInstances()
10217     self.instance_names = [i.name for i in self.instances]
10218
10219     if set(self.instance_names) != owned_instances:
10220       raise errors.OpExecError("Instances on node '%s' changed since locks"
10221                                " were acquired, current instances are '%s',"
10222                                " used to be '%s'; retry the operation" %
10223                                (self.op.node_name,
10224                                 utils.CommaJoin(self.instance_names),
10225                                 utils.CommaJoin(owned_instances)))
10226
10227     if self.instance_names:
10228       self.LogInfo("Evacuating instances from node '%s': %s",
10229                    self.op.node_name,
10230                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10231     else:
10232       self.LogInfo("No instances to evacuate from node '%s'",
10233                    self.op.node_name)
10234
10235     if self.op.remote_node is not None:
10236       for i in self.instances:
10237         if i.primary_node == self.op.remote_node:
10238           raise errors.OpPrereqError("Node %s is the primary node of"
10239                                      " instance %s, cannot use it as"
10240                                      " secondary" %
10241                                      (self.op.remote_node, i.name),
10242                                      errors.ECODE_INVAL)
10243
10244   def Exec(self, feedback_fn):
10245     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10246
10247     if not self.instance_names:
10248       # No instances to evacuate
10249       jobs = []
10250
10251     elif self.op.iallocator is not None:
10252       # TODO: Implement relocation to other group
10253       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10254                        evac_mode=self.op.mode,
10255                        instances=list(self.instance_names))
10256
10257       ial.Run(self.op.iallocator)
10258
10259       if not ial.success:
10260         raise errors.OpPrereqError("Can't compute node evacuation using"
10261                                    " iallocator '%s': %s" %
10262                                    (self.op.iallocator, ial.info),
10263                                    errors.ECODE_NORES)
10264
10265       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10266
10267     elif self.op.remote_node is not None:
10268       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10269       jobs = [
10270         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10271                                         remote_node=self.op.remote_node,
10272                                         disks=[],
10273                                         mode=constants.REPLACE_DISK_CHG,
10274                                         early_release=self.op.early_release)]
10275         for instance_name in self.instance_names
10276         ]
10277
10278     else:
10279       raise errors.ProgrammerError("No iallocator or remote node")
10280
10281     return ResultWithJobs(jobs)
10282
10283
10284 def _SetOpEarlyRelease(early_release, op):
10285   """Sets C{early_release} flag on opcodes if available.
10286
10287   """
10288   try:
10289     op.early_release = early_release
10290   except AttributeError:
10291     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10292
10293   return op
10294
10295
10296 def _NodeEvacDest(use_nodes, group, nodes):
10297   """Returns group or nodes depending on caller's choice.
10298
10299   """
10300   if use_nodes:
10301     return utils.CommaJoin(nodes)
10302   else:
10303     return group
10304
10305
10306 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10307   """Unpacks the result of change-group and node-evacuate iallocator requests.
10308
10309   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10310   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10311
10312   @type lu: L{LogicalUnit}
10313   @param lu: Logical unit instance
10314   @type alloc_result: tuple/list
10315   @param alloc_result: Result from iallocator
10316   @type early_release: bool
10317   @param early_release: Whether to release locks early if possible
10318   @type use_nodes: bool
10319   @param use_nodes: Whether to display node names instead of groups
10320
10321   """
10322   (moved, failed, jobs) = alloc_result
10323
10324   if failed:
10325     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10326                                  for (name, reason) in failed)
10327     lu.LogWarning("Unable to evacuate instances %s", failreason)
10328     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10329
10330   if moved:
10331     lu.LogInfo("Instances to be moved: %s",
10332                utils.CommaJoin("%s (to %s)" %
10333                                (name, _NodeEvacDest(use_nodes, group, nodes))
10334                                for (name, group, nodes) in moved))
10335
10336   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10337               map(opcodes.OpCode.LoadOpCode, ops))
10338           for ops in jobs]
10339
10340
10341 class LUInstanceGrowDisk(LogicalUnit):
10342   """Grow a disk of an instance.
10343
10344   """
10345   HPATH = "disk-grow"
10346   HTYPE = constants.HTYPE_INSTANCE
10347   REQ_BGL = False
10348
10349   def ExpandNames(self):
10350     self._ExpandAndLockInstance()
10351     self.needed_locks[locking.LEVEL_NODE] = []
10352     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10353
10354   def DeclareLocks(self, level):
10355     if level == locking.LEVEL_NODE:
10356       self._LockInstancesNodes()
10357
10358   def BuildHooksEnv(self):
10359     """Build hooks env.
10360
10361     This runs on the master, the primary and all the secondaries.
10362
10363     """
10364     env = {
10365       "DISK": self.op.disk,
10366       "AMOUNT": self.op.amount,
10367       }
10368     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10369     return env
10370
10371   def BuildHooksNodes(self):
10372     """Build hooks nodes.
10373
10374     """
10375     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10376     return (nl, nl)
10377
10378   def CheckPrereq(self):
10379     """Check prerequisites.
10380
10381     This checks that the instance is in the cluster.
10382
10383     """
10384     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10385     assert instance is not None, \
10386       "Cannot retrieve locked instance %s" % self.op.instance_name
10387     nodenames = list(instance.all_nodes)
10388     for node in nodenames:
10389       _CheckNodeOnline(self, node)
10390
10391     self.instance = instance
10392
10393     if instance.disk_template not in constants.DTS_GROWABLE:
10394       raise errors.OpPrereqError("Instance's disk layout does not support"
10395                                  " growing", errors.ECODE_INVAL)
10396
10397     self.disk = instance.FindDisk(self.op.disk)
10398
10399     if instance.disk_template not in (constants.DT_FILE,
10400                                       constants.DT_SHARED_FILE):
10401       # TODO: check the free disk space for file, when that feature will be
10402       # supported
10403       _CheckNodesFreeDiskPerVG(self, nodenames,
10404                                self.disk.ComputeGrowth(self.op.amount))
10405
10406   def Exec(self, feedback_fn):
10407     """Execute disk grow.
10408
10409     """
10410     instance = self.instance
10411     disk = self.disk
10412
10413     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10414     if not disks_ok:
10415       raise errors.OpExecError("Cannot activate block device to grow")
10416
10417     # First run all grow ops in dry-run mode
10418     for node in instance.all_nodes:
10419       self.cfg.SetDiskID(disk, node)
10420       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10421       result.Raise("Grow request failed to node %s" % node)
10422
10423     # We know that (as far as we can test) operations across different
10424     # nodes will succeed, time to run it for real
10425     for node in instance.all_nodes:
10426       self.cfg.SetDiskID(disk, node)
10427       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10428       result.Raise("Grow request failed to node %s" % node)
10429
10430       # TODO: Rewrite code to work properly
10431       # DRBD goes into sync mode for a short amount of time after executing the
10432       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10433       # calling "resize" in sync mode fails. Sleeping for a short amount of
10434       # time is a work-around.
10435       time.sleep(5)
10436
10437     disk.RecordGrow(self.op.amount)
10438     self.cfg.Update(instance, feedback_fn)
10439     if self.op.wait_for_sync:
10440       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10441       if disk_abort:
10442         self.proc.LogWarning("Disk sync-ing has not returned a good"
10443                              " status; please check the instance")
10444       if not instance.admin_up:
10445         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10446     elif not instance.admin_up:
10447       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10448                            " not supposed to be running because no wait for"
10449                            " sync mode was requested")
10450
10451
10452 class LUInstanceQueryData(NoHooksLU):
10453   """Query runtime instance data.
10454
10455   """
10456   REQ_BGL = False
10457
10458   def ExpandNames(self):
10459     self.needed_locks = {}
10460
10461     # Use locking if requested or when non-static information is wanted
10462     if not (self.op.static or self.op.use_locking):
10463       self.LogWarning("Non-static data requested, locks need to be acquired")
10464       self.op.use_locking = True
10465
10466     if self.op.instances or not self.op.use_locking:
10467       # Expand instance names right here
10468       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10469     else:
10470       # Will use acquired locks
10471       self.wanted_names = None
10472
10473     if self.op.use_locking:
10474       self.share_locks = _ShareAll()
10475
10476       if self.wanted_names is None:
10477         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10478       else:
10479         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10480
10481       self.needed_locks[locking.LEVEL_NODE] = []
10482       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10483
10484   def DeclareLocks(self, level):
10485     if self.op.use_locking and level == locking.LEVEL_NODE:
10486       self._LockInstancesNodes()
10487
10488   def CheckPrereq(self):
10489     """Check prerequisites.
10490
10491     This only checks the optional instance list against the existing names.
10492
10493     """
10494     if self.wanted_names is None:
10495       assert self.op.use_locking, "Locking was not used"
10496       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10497
10498     self.wanted_instances = \
10499         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10500
10501   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10502     """Returns the status of a block device
10503
10504     """
10505     if self.op.static or not node:
10506       return None
10507
10508     self.cfg.SetDiskID(dev, node)
10509
10510     result = self.rpc.call_blockdev_find(node, dev)
10511     if result.offline:
10512       return None
10513
10514     result.Raise("Can't compute disk status for %s" % instance_name)
10515
10516     status = result.payload
10517     if status is None:
10518       return None
10519
10520     return (status.dev_path, status.major, status.minor,
10521             status.sync_percent, status.estimated_time,
10522             status.is_degraded, status.ldisk_status)
10523
10524   def _ComputeDiskStatus(self, instance, snode, dev):
10525     """Compute block device status.
10526
10527     """
10528     if dev.dev_type in constants.LDS_DRBD:
10529       # we change the snode then (otherwise we use the one passed in)
10530       if dev.logical_id[0] == instance.primary_node:
10531         snode = dev.logical_id[1]
10532       else:
10533         snode = dev.logical_id[0]
10534
10535     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10536                                               instance.name, dev)
10537     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10538
10539     if dev.children:
10540       dev_children = map(compat.partial(self._ComputeDiskStatus,
10541                                         instance, snode),
10542                          dev.children)
10543     else:
10544       dev_children = []
10545
10546     return {
10547       "iv_name": dev.iv_name,
10548       "dev_type": dev.dev_type,
10549       "logical_id": dev.logical_id,
10550       "physical_id": dev.physical_id,
10551       "pstatus": dev_pstatus,
10552       "sstatus": dev_sstatus,
10553       "children": dev_children,
10554       "mode": dev.mode,
10555       "size": dev.size,
10556       }
10557
10558   def Exec(self, feedback_fn):
10559     """Gather and return data"""
10560     result = {}
10561
10562     cluster = self.cfg.GetClusterInfo()
10563
10564     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10565                                           for i in self.wanted_instances)
10566     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10567       if self.op.static or pnode.offline:
10568         remote_state = None
10569         if pnode.offline:
10570           self.LogWarning("Primary node %s is marked offline, returning static"
10571                           " information only for instance %s" %
10572                           (pnode.name, instance.name))
10573       else:
10574         remote_info = self.rpc.call_instance_info(instance.primary_node,
10575                                                   instance.name,
10576                                                   instance.hypervisor)
10577         remote_info.Raise("Error checking node %s" % instance.primary_node)
10578         remote_info = remote_info.payload
10579         if remote_info and "state" in remote_info:
10580           remote_state = "up"
10581         else:
10582           remote_state = "down"
10583
10584       if instance.admin_up:
10585         config_state = "up"
10586       else:
10587         config_state = "down"
10588
10589       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10590                   instance.disks)
10591
10592       result[instance.name] = {
10593         "name": instance.name,
10594         "config_state": config_state,
10595         "run_state": remote_state,
10596         "pnode": instance.primary_node,
10597         "snodes": instance.secondary_nodes,
10598         "os": instance.os,
10599         # this happens to be the same format used for hooks
10600         "nics": _NICListToTuple(self, instance.nics),
10601         "disk_template": instance.disk_template,
10602         "disks": disks,
10603         "hypervisor": instance.hypervisor,
10604         "network_port": instance.network_port,
10605         "hv_instance": instance.hvparams,
10606         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10607         "be_instance": instance.beparams,
10608         "be_actual": cluster.FillBE(instance),
10609         "os_instance": instance.osparams,
10610         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10611         "serial_no": instance.serial_no,
10612         "mtime": instance.mtime,
10613         "ctime": instance.ctime,
10614         "uuid": instance.uuid,
10615         }
10616
10617     return result
10618
10619
10620 class LUInstanceSetParams(LogicalUnit):
10621   """Modifies an instances's parameters.
10622
10623   """
10624   HPATH = "instance-modify"
10625   HTYPE = constants.HTYPE_INSTANCE
10626   REQ_BGL = False
10627
10628   def CheckArguments(self):
10629     if not (self.op.nics or self.op.disks or self.op.disk_template or
10630             self.op.hvparams or self.op.beparams or self.op.os_name):
10631       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10632
10633     if self.op.hvparams:
10634       _CheckGlobalHvParams(self.op.hvparams)
10635
10636     # Disk validation
10637     disk_addremove = 0
10638     for disk_op, disk_dict in self.op.disks:
10639       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10640       if disk_op == constants.DDM_REMOVE:
10641         disk_addremove += 1
10642         continue
10643       elif disk_op == constants.DDM_ADD:
10644         disk_addremove += 1
10645       else:
10646         if not isinstance(disk_op, int):
10647           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10648         if not isinstance(disk_dict, dict):
10649           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10650           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10651
10652       if disk_op == constants.DDM_ADD:
10653         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10654         if mode not in constants.DISK_ACCESS_SET:
10655           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10656                                      errors.ECODE_INVAL)
10657         size = disk_dict.get(constants.IDISK_SIZE, None)
10658         if size is None:
10659           raise errors.OpPrereqError("Required disk parameter size missing",
10660                                      errors.ECODE_INVAL)
10661         try:
10662           size = int(size)
10663         except (TypeError, ValueError), err:
10664           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10665                                      str(err), errors.ECODE_INVAL)
10666         disk_dict[constants.IDISK_SIZE] = size
10667       else:
10668         # modification of disk
10669         if constants.IDISK_SIZE in disk_dict:
10670           raise errors.OpPrereqError("Disk size change not possible, use"
10671                                      " grow-disk", errors.ECODE_INVAL)
10672
10673     if disk_addremove > 1:
10674       raise errors.OpPrereqError("Only one disk add or remove operation"
10675                                  " supported at a time", errors.ECODE_INVAL)
10676
10677     if self.op.disks and self.op.disk_template is not None:
10678       raise errors.OpPrereqError("Disk template conversion and other disk"
10679                                  " changes not supported at the same time",
10680                                  errors.ECODE_INVAL)
10681
10682     if (self.op.disk_template and
10683         self.op.disk_template in constants.DTS_INT_MIRROR and
10684         self.op.remote_node is None):
10685       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10686                                  " one requires specifying a secondary node",
10687                                  errors.ECODE_INVAL)
10688
10689     # NIC validation
10690     nic_addremove = 0
10691     for nic_op, nic_dict in self.op.nics:
10692       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10693       if nic_op == constants.DDM_REMOVE:
10694         nic_addremove += 1
10695         continue
10696       elif nic_op == constants.DDM_ADD:
10697         nic_addremove += 1
10698       else:
10699         if not isinstance(nic_op, int):
10700           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10701         if not isinstance(nic_dict, dict):
10702           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10703           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10704
10705       # nic_dict should be a dict
10706       nic_ip = nic_dict.get(constants.INIC_IP, None)
10707       if nic_ip is not None:
10708         if nic_ip.lower() == constants.VALUE_NONE:
10709           nic_dict[constants.INIC_IP] = None
10710         else:
10711           if not netutils.IPAddress.IsValid(nic_ip):
10712             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10713                                        errors.ECODE_INVAL)
10714
10715       nic_bridge = nic_dict.get("bridge", None)
10716       nic_link = nic_dict.get(constants.INIC_LINK, None)
10717       if nic_bridge and nic_link:
10718         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10719                                    " at the same time", errors.ECODE_INVAL)
10720       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10721         nic_dict["bridge"] = None
10722       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10723         nic_dict[constants.INIC_LINK] = None
10724
10725       if nic_op == constants.DDM_ADD:
10726         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10727         if nic_mac is None:
10728           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10729
10730       if constants.INIC_MAC in nic_dict:
10731         nic_mac = nic_dict[constants.INIC_MAC]
10732         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10733           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10734
10735         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10736           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10737                                      " modifying an existing nic",
10738                                      errors.ECODE_INVAL)
10739
10740     if nic_addremove > 1:
10741       raise errors.OpPrereqError("Only one NIC add or remove operation"
10742                                  " supported at a time", errors.ECODE_INVAL)
10743
10744   def ExpandNames(self):
10745     self._ExpandAndLockInstance()
10746     self.needed_locks[locking.LEVEL_NODE] = []
10747     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10748
10749   def DeclareLocks(self, level):
10750     if level == locking.LEVEL_NODE:
10751       self._LockInstancesNodes()
10752       if self.op.disk_template and self.op.remote_node:
10753         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10754         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10755
10756   def BuildHooksEnv(self):
10757     """Build hooks env.
10758
10759     This runs on the master, primary and secondaries.
10760
10761     """
10762     args = dict()
10763     if constants.BE_MEMORY in self.be_new:
10764       args["memory"] = self.be_new[constants.BE_MEMORY]
10765     if constants.BE_VCPUS in self.be_new:
10766       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10767     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10768     # information at all.
10769     if self.op.nics:
10770       args["nics"] = []
10771       nic_override = dict(self.op.nics)
10772       for idx, nic in enumerate(self.instance.nics):
10773         if idx in nic_override:
10774           this_nic_override = nic_override[idx]
10775         else:
10776           this_nic_override = {}
10777         if constants.INIC_IP in this_nic_override:
10778           ip = this_nic_override[constants.INIC_IP]
10779         else:
10780           ip = nic.ip
10781         if constants.INIC_MAC in this_nic_override:
10782           mac = this_nic_override[constants.INIC_MAC]
10783         else:
10784           mac = nic.mac
10785         if idx in self.nic_pnew:
10786           nicparams = self.nic_pnew[idx]
10787         else:
10788           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10789         mode = nicparams[constants.NIC_MODE]
10790         link = nicparams[constants.NIC_LINK]
10791         args["nics"].append((ip, mac, mode, link))
10792       if constants.DDM_ADD in nic_override:
10793         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10794         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10795         nicparams = self.nic_pnew[constants.DDM_ADD]
10796         mode = nicparams[constants.NIC_MODE]
10797         link = nicparams[constants.NIC_LINK]
10798         args["nics"].append((ip, mac, mode, link))
10799       elif constants.DDM_REMOVE in nic_override:
10800         del args["nics"][-1]
10801
10802     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10803     if self.op.disk_template:
10804       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10805
10806     return env
10807
10808   def BuildHooksNodes(self):
10809     """Build hooks nodes.
10810
10811     """
10812     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10813     return (nl, nl)
10814
10815   def CheckPrereq(self):
10816     """Check prerequisites.
10817
10818     This only checks the instance list against the existing names.
10819
10820     """
10821     # checking the new params on the primary/secondary nodes
10822
10823     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10824     cluster = self.cluster = self.cfg.GetClusterInfo()
10825     assert self.instance is not None, \
10826       "Cannot retrieve locked instance %s" % self.op.instance_name
10827     pnode = instance.primary_node
10828     nodelist = list(instance.all_nodes)
10829
10830     # OS change
10831     if self.op.os_name and not self.op.force:
10832       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10833                       self.op.force_variant)
10834       instance_os = self.op.os_name
10835     else:
10836       instance_os = instance.os
10837
10838     if self.op.disk_template:
10839       if instance.disk_template == self.op.disk_template:
10840         raise errors.OpPrereqError("Instance already has disk template %s" %
10841                                    instance.disk_template, errors.ECODE_INVAL)
10842
10843       if (instance.disk_template,
10844           self.op.disk_template) not in self._DISK_CONVERSIONS:
10845         raise errors.OpPrereqError("Unsupported disk template conversion from"
10846                                    " %s to %s" % (instance.disk_template,
10847                                                   self.op.disk_template),
10848                                    errors.ECODE_INVAL)
10849       _CheckInstanceDown(self, instance, "cannot change disk template")
10850       if self.op.disk_template in constants.DTS_INT_MIRROR:
10851         if self.op.remote_node == pnode:
10852           raise errors.OpPrereqError("Given new secondary node %s is the same"
10853                                      " as the primary node of the instance" %
10854                                      self.op.remote_node, errors.ECODE_STATE)
10855         _CheckNodeOnline(self, self.op.remote_node)
10856         _CheckNodeNotDrained(self, self.op.remote_node)
10857         # FIXME: here we assume that the old instance type is DT_PLAIN
10858         assert instance.disk_template == constants.DT_PLAIN
10859         disks = [{constants.IDISK_SIZE: d.size,
10860                   constants.IDISK_VG: d.logical_id[0]}
10861                  for d in instance.disks]
10862         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10863         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10864
10865     # hvparams processing
10866     if self.op.hvparams:
10867       hv_type = instance.hypervisor
10868       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10869       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10870       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10871
10872       # local check
10873       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10874       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10875       self.hv_new = hv_new # the new actual values
10876       self.hv_inst = i_hvdict # the new dict (without defaults)
10877     else:
10878       self.hv_new = self.hv_inst = {}
10879
10880     # beparams processing
10881     if self.op.beparams:
10882       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10883                                    use_none=True)
10884       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10885       be_new = cluster.SimpleFillBE(i_bedict)
10886       self.be_new = be_new # the new actual values
10887       self.be_inst = i_bedict # the new dict (without defaults)
10888     else:
10889       self.be_new = self.be_inst = {}
10890     be_old = cluster.FillBE(instance)
10891
10892     # osparams processing
10893     if self.op.osparams:
10894       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10895       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10896       self.os_inst = i_osdict # the new dict (without defaults)
10897     else:
10898       self.os_inst = {}
10899
10900     self.warn = []
10901
10902     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10903         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10904       mem_check_list = [pnode]
10905       if be_new[constants.BE_AUTO_BALANCE]:
10906         # either we changed auto_balance to yes or it was from before
10907         mem_check_list.extend(instance.secondary_nodes)
10908       instance_info = self.rpc.call_instance_info(pnode, instance.name,
10909                                                   instance.hypervisor)
10910       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10911                                          instance.hypervisor)
10912       pninfo = nodeinfo[pnode]
10913       msg = pninfo.fail_msg
10914       if msg:
10915         # Assume the primary node is unreachable and go ahead
10916         self.warn.append("Can't get info from primary node %s: %s" %
10917                          (pnode, msg))
10918       elif not isinstance(pninfo.payload.get("memory_free", None), int):
10919         self.warn.append("Node data from primary node %s doesn't contain"
10920                          " free memory information" % pnode)
10921       elif instance_info.fail_msg:
10922         self.warn.append("Can't get instance runtime information: %s" %
10923                         instance_info.fail_msg)
10924       else:
10925         if instance_info.payload:
10926           current_mem = int(instance_info.payload["memory"])
10927         else:
10928           # Assume instance not running
10929           # (there is a slight race condition here, but it's not very probable,
10930           # and we have no other way to check)
10931           current_mem = 0
10932         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10933                     pninfo.payload["memory_free"])
10934         if miss_mem > 0:
10935           raise errors.OpPrereqError("This change will prevent the instance"
10936                                      " from starting, due to %d MB of memory"
10937                                      " missing on its primary node" % miss_mem,
10938                                      errors.ECODE_NORES)
10939
10940       if be_new[constants.BE_AUTO_BALANCE]:
10941         for node, nres in nodeinfo.items():
10942           if node not in instance.secondary_nodes:
10943             continue
10944           nres.Raise("Can't get info from secondary node %s" % node,
10945                      prereq=True, ecode=errors.ECODE_STATE)
10946           if not isinstance(nres.payload.get("memory_free", None), int):
10947             raise errors.OpPrereqError("Secondary node %s didn't return free"
10948                                        " memory information" % node,
10949                                        errors.ECODE_STATE)
10950           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10951             raise errors.OpPrereqError("This change will prevent the instance"
10952                                        " from failover to its secondary node"
10953                                        " %s, due to not enough memory" % node,
10954                                        errors.ECODE_STATE)
10955
10956     # NIC processing
10957     self.nic_pnew = {}
10958     self.nic_pinst = {}
10959     for nic_op, nic_dict in self.op.nics:
10960       if nic_op == constants.DDM_REMOVE:
10961         if not instance.nics:
10962           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10963                                      errors.ECODE_INVAL)
10964         continue
10965       if nic_op != constants.DDM_ADD:
10966         # an existing nic
10967         if not instance.nics:
10968           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10969                                      " no NICs" % nic_op,
10970                                      errors.ECODE_INVAL)
10971         if nic_op < 0 or nic_op >= len(instance.nics):
10972           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10973                                      " are 0 to %d" %
10974                                      (nic_op, len(instance.nics) - 1),
10975                                      errors.ECODE_INVAL)
10976         old_nic_params = instance.nics[nic_op].nicparams
10977         old_nic_ip = instance.nics[nic_op].ip
10978       else:
10979         old_nic_params = {}
10980         old_nic_ip = None
10981
10982       update_params_dict = dict([(key, nic_dict[key])
10983                                  for key in constants.NICS_PARAMETERS
10984                                  if key in nic_dict])
10985
10986       if "bridge" in nic_dict:
10987         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10988
10989       new_nic_params = _GetUpdatedParams(old_nic_params,
10990                                          update_params_dict)
10991       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10992       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10993       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10994       self.nic_pinst[nic_op] = new_nic_params
10995       self.nic_pnew[nic_op] = new_filled_nic_params
10996       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10997
10998       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10999         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11000         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11001         if msg:
11002           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11003           if self.op.force:
11004             self.warn.append(msg)
11005           else:
11006             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11007       if new_nic_mode == constants.NIC_MODE_ROUTED:
11008         if constants.INIC_IP in nic_dict:
11009           nic_ip = nic_dict[constants.INIC_IP]
11010         else:
11011           nic_ip = old_nic_ip
11012         if nic_ip is None:
11013           raise errors.OpPrereqError("Cannot set the nic ip to None"
11014                                      " on a routed nic", errors.ECODE_INVAL)
11015       if constants.INIC_MAC in nic_dict:
11016         nic_mac = nic_dict[constants.INIC_MAC]
11017         if nic_mac is None:
11018           raise errors.OpPrereqError("Cannot set the nic mac to None",
11019                                      errors.ECODE_INVAL)
11020         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11021           # otherwise generate the mac
11022           nic_dict[constants.INIC_MAC] = \
11023             self.cfg.GenerateMAC(self.proc.GetECId())
11024         else:
11025           # or validate/reserve the current one
11026           try:
11027             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11028           except errors.ReservationError:
11029             raise errors.OpPrereqError("MAC address %s already in use"
11030                                        " in cluster" % nic_mac,
11031                                        errors.ECODE_NOTUNIQUE)
11032
11033     # DISK processing
11034     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11035       raise errors.OpPrereqError("Disk operations not supported for"
11036                                  " diskless instances",
11037                                  errors.ECODE_INVAL)
11038     for disk_op, _ in self.op.disks:
11039       if disk_op == constants.DDM_REMOVE:
11040         if len(instance.disks) == 1:
11041           raise errors.OpPrereqError("Cannot remove the last disk of"
11042                                      " an instance", errors.ECODE_INVAL)
11043         _CheckInstanceDown(self, instance, "cannot remove disks")
11044
11045       if (disk_op == constants.DDM_ADD and
11046           len(instance.disks) >= constants.MAX_DISKS):
11047         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11048                                    " add more" % constants.MAX_DISKS,
11049                                    errors.ECODE_STATE)
11050       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11051         # an existing disk
11052         if disk_op < 0 or disk_op >= len(instance.disks):
11053           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11054                                      " are 0 to %d" %
11055                                      (disk_op, len(instance.disks)),
11056                                      errors.ECODE_INVAL)
11057
11058     return
11059
11060   def _ConvertPlainToDrbd(self, feedback_fn):
11061     """Converts an instance from plain to drbd.
11062
11063     """
11064     feedback_fn("Converting template to drbd")
11065     instance = self.instance
11066     pnode = instance.primary_node
11067     snode = self.op.remote_node
11068
11069     # create a fake disk info for _GenerateDiskTemplate
11070     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11071                   constants.IDISK_VG: d.logical_id[0]}
11072                  for d in instance.disks]
11073     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11074                                       instance.name, pnode, [snode],
11075                                       disk_info, None, None, 0, feedback_fn)
11076     info = _GetInstanceInfoText(instance)
11077     feedback_fn("Creating aditional volumes...")
11078     # first, create the missing data and meta devices
11079     for disk in new_disks:
11080       # unfortunately this is... not too nice
11081       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11082                             info, True)
11083       for child in disk.children:
11084         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11085     # at this stage, all new LVs have been created, we can rename the
11086     # old ones
11087     feedback_fn("Renaming original volumes...")
11088     rename_list = [(o, n.children[0].logical_id)
11089                    for (o, n) in zip(instance.disks, new_disks)]
11090     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11091     result.Raise("Failed to rename original LVs")
11092
11093     feedback_fn("Initializing DRBD devices...")
11094     # all child devices are in place, we can now create the DRBD devices
11095     for disk in new_disks:
11096       for node in [pnode, snode]:
11097         f_create = node == pnode
11098         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11099
11100     # at this point, the instance has been modified
11101     instance.disk_template = constants.DT_DRBD8
11102     instance.disks = new_disks
11103     self.cfg.Update(instance, feedback_fn)
11104
11105     # disks are created, waiting for sync
11106     disk_abort = not _WaitForSync(self, instance,
11107                                   oneshot=not self.op.wait_for_sync)
11108     if disk_abort:
11109       raise errors.OpExecError("There are some degraded disks for"
11110                                " this instance, please cleanup manually")
11111
11112   def _ConvertDrbdToPlain(self, feedback_fn):
11113     """Converts an instance from drbd to plain.
11114
11115     """
11116     instance = self.instance
11117     assert len(instance.secondary_nodes) == 1
11118     pnode = instance.primary_node
11119     snode = instance.secondary_nodes[0]
11120     feedback_fn("Converting template to plain")
11121
11122     old_disks = instance.disks
11123     new_disks = [d.children[0] for d in old_disks]
11124
11125     # copy over size and mode
11126     for parent, child in zip(old_disks, new_disks):
11127       child.size = parent.size
11128       child.mode = parent.mode
11129
11130     # update instance structure
11131     instance.disks = new_disks
11132     instance.disk_template = constants.DT_PLAIN
11133     self.cfg.Update(instance, feedback_fn)
11134
11135     feedback_fn("Removing volumes on the secondary node...")
11136     for disk in old_disks:
11137       self.cfg.SetDiskID(disk, snode)
11138       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11139       if msg:
11140         self.LogWarning("Could not remove block device %s on node %s,"
11141                         " continuing anyway: %s", disk.iv_name, snode, msg)
11142
11143     feedback_fn("Removing unneeded volumes on the primary node...")
11144     for idx, disk in enumerate(old_disks):
11145       meta = disk.children[1]
11146       self.cfg.SetDiskID(meta, pnode)
11147       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11148       if msg:
11149         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11150                         " continuing anyway: %s", idx, pnode, msg)
11151
11152     # this is a DRBD disk, return its port to the pool
11153     for disk in old_disks:
11154       tcp_port = disk.logical_id[2]
11155       self.cfg.AddTcpUdpPort(tcp_port)
11156
11157   def Exec(self, feedback_fn):
11158     """Modifies an instance.
11159
11160     All parameters take effect only at the next restart of the instance.
11161
11162     """
11163     # Process here the warnings from CheckPrereq, as we don't have a
11164     # feedback_fn there.
11165     for warn in self.warn:
11166       feedback_fn("WARNING: %s" % warn)
11167
11168     result = []
11169     instance = self.instance
11170     # disk changes
11171     for disk_op, disk_dict in self.op.disks:
11172       if disk_op == constants.DDM_REMOVE:
11173         # remove the last disk
11174         device = instance.disks.pop()
11175         device_idx = len(instance.disks)
11176         for node, disk in device.ComputeNodeTree(instance.primary_node):
11177           self.cfg.SetDiskID(disk, node)
11178           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11179           if msg:
11180             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11181                             " continuing anyway", device_idx, node, msg)
11182         result.append(("disk/%d" % device_idx, "remove"))
11183
11184         # if this is a DRBD disk, return its port to the pool
11185         if device.dev_type in constants.LDS_DRBD:
11186           tcp_port = device.logical_id[2]
11187           self.cfg.AddTcpUdpPort(tcp_port)
11188       elif disk_op == constants.DDM_ADD:
11189         # add a new disk
11190         if instance.disk_template in (constants.DT_FILE,
11191                                         constants.DT_SHARED_FILE):
11192           file_driver, file_path = instance.disks[0].logical_id
11193           file_path = os.path.dirname(file_path)
11194         else:
11195           file_driver = file_path = None
11196         disk_idx_base = len(instance.disks)
11197         new_disk = _GenerateDiskTemplate(self,
11198                                          instance.disk_template,
11199                                          instance.name, instance.primary_node,
11200                                          instance.secondary_nodes,
11201                                          [disk_dict],
11202                                          file_path,
11203                                          file_driver,
11204                                          disk_idx_base, feedback_fn)[0]
11205         instance.disks.append(new_disk)
11206         info = _GetInstanceInfoText(instance)
11207
11208         logging.info("Creating volume %s for instance %s",
11209                      new_disk.iv_name, instance.name)
11210         # Note: this needs to be kept in sync with _CreateDisks
11211         #HARDCODE
11212         for node in instance.all_nodes:
11213           f_create = node == instance.primary_node
11214           try:
11215             _CreateBlockDev(self, node, instance, new_disk,
11216                             f_create, info, f_create)
11217           except errors.OpExecError, err:
11218             self.LogWarning("Failed to create volume %s (%s) on"
11219                             " node %s: %s",
11220                             new_disk.iv_name, new_disk, node, err)
11221         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11222                        (new_disk.size, new_disk.mode)))
11223       else:
11224         # change a given disk
11225         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11226         result.append(("disk.mode/%d" % disk_op,
11227                        disk_dict[constants.IDISK_MODE]))
11228
11229     if self.op.disk_template:
11230       r_shut = _ShutdownInstanceDisks(self, instance)
11231       if not r_shut:
11232         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11233                                  " proceed with disk template conversion")
11234       mode = (instance.disk_template, self.op.disk_template)
11235       try:
11236         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11237       except:
11238         self.cfg.ReleaseDRBDMinors(instance.name)
11239         raise
11240       result.append(("disk_template", self.op.disk_template))
11241
11242     # NIC changes
11243     for nic_op, nic_dict in self.op.nics:
11244       if nic_op == constants.DDM_REMOVE:
11245         # remove the last nic
11246         del instance.nics[-1]
11247         result.append(("nic.%d" % len(instance.nics), "remove"))
11248       elif nic_op == constants.DDM_ADD:
11249         # mac and bridge should be set, by now
11250         mac = nic_dict[constants.INIC_MAC]
11251         ip = nic_dict.get(constants.INIC_IP, None)
11252         nicparams = self.nic_pinst[constants.DDM_ADD]
11253         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11254         instance.nics.append(new_nic)
11255         result.append(("nic.%d" % (len(instance.nics) - 1),
11256                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11257                        (new_nic.mac, new_nic.ip,
11258                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11259                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11260                        )))
11261       else:
11262         for key in (constants.INIC_MAC, constants.INIC_IP):
11263           if key in nic_dict:
11264             setattr(instance.nics[nic_op], key, nic_dict[key])
11265         if nic_op in self.nic_pinst:
11266           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11267         for key, val in nic_dict.iteritems():
11268           result.append(("nic.%s/%d" % (key, nic_op), val))
11269
11270     # hvparams changes
11271     if self.op.hvparams:
11272       instance.hvparams = self.hv_inst
11273       for key, val in self.op.hvparams.iteritems():
11274         result.append(("hv/%s" % key, val))
11275
11276     # beparams changes
11277     if self.op.beparams:
11278       instance.beparams = self.be_inst
11279       for key, val in self.op.beparams.iteritems():
11280         result.append(("be/%s" % key, val))
11281
11282     # OS change
11283     if self.op.os_name:
11284       instance.os = self.op.os_name
11285
11286     # osparams changes
11287     if self.op.osparams:
11288       instance.osparams = self.os_inst
11289       for key, val in self.op.osparams.iteritems():
11290         result.append(("os/%s" % key, val))
11291
11292     self.cfg.Update(instance, feedback_fn)
11293
11294     return result
11295
11296   _DISK_CONVERSIONS = {
11297     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11298     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11299     }
11300
11301
11302 class LUInstanceChangeGroup(LogicalUnit):
11303   HPATH = "instance-change-group"
11304   HTYPE = constants.HTYPE_INSTANCE
11305   REQ_BGL = False
11306
11307   def ExpandNames(self):
11308     self.share_locks = _ShareAll()
11309     self.needed_locks = {
11310       locking.LEVEL_NODEGROUP: [],
11311       locking.LEVEL_NODE: [],
11312       }
11313
11314     self._ExpandAndLockInstance()
11315
11316     if self.op.target_groups:
11317       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11318                                   self.op.target_groups)
11319     else:
11320       self.req_target_uuids = None
11321
11322     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11323
11324   def DeclareLocks(self, level):
11325     if level == locking.LEVEL_NODEGROUP:
11326       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11327
11328       if self.req_target_uuids:
11329         lock_groups = set(self.req_target_uuids)
11330
11331         # Lock all groups used by instance optimistically; this requires going
11332         # via the node before it's locked, requiring verification later on
11333         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11334         lock_groups.update(instance_groups)
11335       else:
11336         # No target groups, need to lock all of them
11337         lock_groups = locking.ALL_SET
11338
11339       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11340
11341     elif level == locking.LEVEL_NODE:
11342       if self.req_target_uuids:
11343         # Lock all nodes used by instances
11344         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11345         self._LockInstancesNodes()
11346
11347         # Lock all nodes in all potential target groups
11348         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11349                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11350         member_nodes = [node_name
11351                         for group in lock_groups
11352                         for node_name in self.cfg.GetNodeGroup(group).members]
11353         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11354       else:
11355         # Lock all nodes as all groups are potential targets
11356         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11357
11358   def CheckPrereq(self):
11359     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11360     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11361     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11362
11363     assert (self.req_target_uuids is None or
11364             owned_groups.issuperset(self.req_target_uuids))
11365     assert owned_instances == set([self.op.instance_name])
11366
11367     # Get instance information
11368     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11369
11370     # Check if node groups for locked instance are still correct
11371     assert owned_nodes.issuperset(self.instance.all_nodes), \
11372       ("Instance %s's nodes changed while we kept the lock" %
11373        self.op.instance_name)
11374
11375     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11376                                            owned_groups)
11377
11378     if self.req_target_uuids:
11379       # User requested specific target groups
11380       self.target_uuids = self.req_target_uuids
11381     else:
11382       # All groups except those used by the instance are potential targets
11383       self.target_uuids = owned_groups - inst_groups
11384
11385     conflicting_groups = self.target_uuids & inst_groups
11386     if conflicting_groups:
11387       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11388                                  " used by the instance '%s'" %
11389                                  (utils.CommaJoin(conflicting_groups),
11390                                   self.op.instance_name),
11391                                  errors.ECODE_INVAL)
11392
11393     if not self.target_uuids:
11394       raise errors.OpPrereqError("There are no possible target groups",
11395                                  errors.ECODE_INVAL)
11396
11397   def BuildHooksEnv(self):
11398     """Build hooks env.
11399
11400     """
11401     assert self.target_uuids
11402
11403     env = {
11404       "TARGET_GROUPS": " ".join(self.target_uuids),
11405       }
11406
11407     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11408
11409     return env
11410
11411   def BuildHooksNodes(self):
11412     """Build hooks nodes.
11413
11414     """
11415     mn = self.cfg.GetMasterNode()
11416     return ([mn], [mn])
11417
11418   def Exec(self, feedback_fn):
11419     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11420
11421     assert instances == [self.op.instance_name], "Instance not locked"
11422
11423     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11424                      instances=instances, target_groups=list(self.target_uuids))
11425
11426     ial.Run(self.op.iallocator)
11427
11428     if not ial.success:
11429       raise errors.OpPrereqError("Can't compute solution for changing group of"
11430                                  " instance '%s' using iallocator '%s': %s" %
11431                                  (self.op.instance_name, self.op.iallocator,
11432                                   ial.info),
11433                                  errors.ECODE_NORES)
11434
11435     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11436
11437     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11438                  " instance '%s'", len(jobs), self.op.instance_name)
11439
11440     return ResultWithJobs(jobs)
11441
11442
11443 class LUBackupQuery(NoHooksLU):
11444   """Query the exports list
11445
11446   """
11447   REQ_BGL = False
11448
11449   def ExpandNames(self):
11450     self.needed_locks = {}
11451     self.share_locks[locking.LEVEL_NODE] = 1
11452     if not self.op.nodes:
11453       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11454     else:
11455       self.needed_locks[locking.LEVEL_NODE] = \
11456         _GetWantedNodes(self, self.op.nodes)
11457
11458   def Exec(self, feedback_fn):
11459     """Compute the list of all the exported system images.
11460
11461     @rtype: dict
11462     @return: a dictionary with the structure node->(export-list)
11463         where export-list is a list of the instances exported on
11464         that node.
11465
11466     """
11467     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11468     rpcresult = self.rpc.call_export_list(self.nodes)
11469     result = {}
11470     for node in rpcresult:
11471       if rpcresult[node].fail_msg:
11472         result[node] = False
11473       else:
11474         result[node] = rpcresult[node].payload
11475
11476     return result
11477
11478
11479 class LUBackupPrepare(NoHooksLU):
11480   """Prepares an instance for an export and returns useful information.
11481
11482   """
11483   REQ_BGL = False
11484
11485   def ExpandNames(self):
11486     self._ExpandAndLockInstance()
11487
11488   def CheckPrereq(self):
11489     """Check prerequisites.
11490
11491     """
11492     instance_name = self.op.instance_name
11493
11494     self.instance = self.cfg.GetInstanceInfo(instance_name)
11495     assert self.instance is not None, \
11496           "Cannot retrieve locked instance %s" % self.op.instance_name
11497     _CheckNodeOnline(self, self.instance.primary_node)
11498
11499     self._cds = _GetClusterDomainSecret()
11500
11501   def Exec(self, feedback_fn):
11502     """Prepares an instance for an export.
11503
11504     """
11505     instance = self.instance
11506
11507     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11508       salt = utils.GenerateSecret(8)
11509
11510       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11511       result = self.rpc.call_x509_cert_create(instance.primary_node,
11512                                               constants.RIE_CERT_VALIDITY)
11513       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11514
11515       (name, cert_pem) = result.payload
11516
11517       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11518                                              cert_pem)
11519
11520       return {
11521         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11522         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11523                           salt),
11524         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11525         }
11526
11527     return None
11528
11529
11530 class LUBackupExport(LogicalUnit):
11531   """Export an instance to an image in the cluster.
11532
11533   """
11534   HPATH = "instance-export"
11535   HTYPE = constants.HTYPE_INSTANCE
11536   REQ_BGL = False
11537
11538   def CheckArguments(self):
11539     """Check the arguments.
11540
11541     """
11542     self.x509_key_name = self.op.x509_key_name
11543     self.dest_x509_ca_pem = self.op.destination_x509_ca
11544
11545     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11546       if not self.x509_key_name:
11547         raise errors.OpPrereqError("Missing X509 key name for encryption",
11548                                    errors.ECODE_INVAL)
11549
11550       if not self.dest_x509_ca_pem:
11551         raise errors.OpPrereqError("Missing destination X509 CA",
11552                                    errors.ECODE_INVAL)
11553
11554   def ExpandNames(self):
11555     self._ExpandAndLockInstance()
11556
11557     # Lock all nodes for local exports
11558     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11559       # FIXME: lock only instance primary and destination node
11560       #
11561       # Sad but true, for now we have do lock all nodes, as we don't know where
11562       # the previous export might be, and in this LU we search for it and
11563       # remove it from its current node. In the future we could fix this by:
11564       #  - making a tasklet to search (share-lock all), then create the
11565       #    new one, then one to remove, after
11566       #  - removing the removal operation altogether
11567       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11568
11569   def DeclareLocks(self, level):
11570     """Last minute lock declaration."""
11571     # All nodes are locked anyway, so nothing to do here.
11572
11573   def BuildHooksEnv(self):
11574     """Build hooks env.
11575
11576     This will run on the master, primary node and target node.
11577
11578     """
11579     env = {
11580       "EXPORT_MODE": self.op.mode,
11581       "EXPORT_NODE": self.op.target_node,
11582       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11583       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11584       # TODO: Generic function for boolean env variables
11585       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11586       }
11587
11588     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11589
11590     return env
11591
11592   def BuildHooksNodes(self):
11593     """Build hooks nodes.
11594
11595     """
11596     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11597
11598     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11599       nl.append(self.op.target_node)
11600
11601     return (nl, nl)
11602
11603   def CheckPrereq(self):
11604     """Check prerequisites.
11605
11606     This checks that the instance and node names are valid.
11607
11608     """
11609     instance_name = self.op.instance_name
11610
11611     self.instance = self.cfg.GetInstanceInfo(instance_name)
11612     assert self.instance is not None, \
11613           "Cannot retrieve locked instance %s" % self.op.instance_name
11614     _CheckNodeOnline(self, self.instance.primary_node)
11615
11616     if (self.op.remove_instance and self.instance.admin_up and
11617         not self.op.shutdown):
11618       raise errors.OpPrereqError("Can not remove instance without shutting it"
11619                                  " down before")
11620
11621     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11622       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11623       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11624       assert self.dst_node is not None
11625
11626       _CheckNodeOnline(self, self.dst_node.name)
11627       _CheckNodeNotDrained(self, self.dst_node.name)
11628
11629       self._cds = None
11630       self.dest_disk_info = None
11631       self.dest_x509_ca = None
11632
11633     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11634       self.dst_node = None
11635
11636       if len(self.op.target_node) != len(self.instance.disks):
11637         raise errors.OpPrereqError(("Received destination information for %s"
11638                                     " disks, but instance %s has %s disks") %
11639                                    (len(self.op.target_node), instance_name,
11640                                     len(self.instance.disks)),
11641                                    errors.ECODE_INVAL)
11642
11643       cds = _GetClusterDomainSecret()
11644
11645       # Check X509 key name
11646       try:
11647         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11648       except (TypeError, ValueError), err:
11649         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11650
11651       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11652         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11653                                    errors.ECODE_INVAL)
11654
11655       # Load and verify CA
11656       try:
11657         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11658       except OpenSSL.crypto.Error, err:
11659         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11660                                    (err, ), errors.ECODE_INVAL)
11661
11662       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11663       if errcode is not None:
11664         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11665                                    (msg, ), errors.ECODE_INVAL)
11666
11667       self.dest_x509_ca = cert
11668
11669       # Verify target information
11670       disk_info = []
11671       for idx, disk_data in enumerate(self.op.target_node):
11672         try:
11673           (host, port, magic) = \
11674             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11675         except errors.GenericError, err:
11676           raise errors.OpPrereqError("Target info for disk %s: %s" %
11677                                      (idx, err), errors.ECODE_INVAL)
11678
11679         disk_info.append((host, port, magic))
11680
11681       assert len(disk_info) == len(self.op.target_node)
11682       self.dest_disk_info = disk_info
11683
11684     else:
11685       raise errors.ProgrammerError("Unhandled export mode %r" %
11686                                    self.op.mode)
11687
11688     # instance disk type verification
11689     # TODO: Implement export support for file-based disks
11690     for disk in self.instance.disks:
11691       if disk.dev_type == constants.LD_FILE:
11692         raise errors.OpPrereqError("Export not supported for instances with"
11693                                    " file-based disks", errors.ECODE_INVAL)
11694
11695   def _CleanupExports(self, feedback_fn):
11696     """Removes exports of current instance from all other nodes.
11697
11698     If an instance in a cluster with nodes A..D was exported to node C, its
11699     exports will be removed from the nodes A, B and D.
11700
11701     """
11702     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11703
11704     nodelist = self.cfg.GetNodeList()
11705     nodelist.remove(self.dst_node.name)
11706
11707     # on one-node clusters nodelist will be empty after the removal
11708     # if we proceed the backup would be removed because OpBackupQuery
11709     # substitutes an empty list with the full cluster node list.
11710     iname = self.instance.name
11711     if nodelist:
11712       feedback_fn("Removing old exports for instance %s" % iname)
11713       exportlist = self.rpc.call_export_list(nodelist)
11714       for node in exportlist:
11715         if exportlist[node].fail_msg:
11716           continue
11717         if iname in exportlist[node].payload:
11718           msg = self.rpc.call_export_remove(node, iname).fail_msg
11719           if msg:
11720             self.LogWarning("Could not remove older export for instance %s"
11721                             " on node %s: %s", iname, node, msg)
11722
11723   def Exec(self, feedback_fn):
11724     """Export an instance to an image in the cluster.
11725
11726     """
11727     assert self.op.mode in constants.EXPORT_MODES
11728
11729     instance = self.instance
11730     src_node = instance.primary_node
11731
11732     if self.op.shutdown:
11733       # shutdown the instance, but not the disks
11734       feedback_fn("Shutting down instance %s" % instance.name)
11735       result = self.rpc.call_instance_shutdown(src_node, instance,
11736                                                self.op.shutdown_timeout)
11737       # TODO: Maybe ignore failures if ignore_remove_failures is set
11738       result.Raise("Could not shutdown instance %s on"
11739                    " node %s" % (instance.name, src_node))
11740
11741     # set the disks ID correctly since call_instance_start needs the
11742     # correct drbd minor to create the symlinks
11743     for disk in instance.disks:
11744       self.cfg.SetDiskID(disk, src_node)
11745
11746     activate_disks = (not instance.admin_up)
11747
11748     if activate_disks:
11749       # Activate the instance disks if we'exporting a stopped instance
11750       feedback_fn("Activating disks for %s" % instance.name)
11751       _StartInstanceDisks(self, instance, None)
11752
11753     try:
11754       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11755                                                      instance)
11756
11757       helper.CreateSnapshots()
11758       try:
11759         if (self.op.shutdown and instance.admin_up and
11760             not self.op.remove_instance):
11761           assert not activate_disks
11762           feedback_fn("Starting instance %s" % instance.name)
11763           result = self.rpc.call_instance_start(src_node, instance,
11764                                                 None, None, False)
11765           msg = result.fail_msg
11766           if msg:
11767             feedback_fn("Failed to start instance: %s" % msg)
11768             _ShutdownInstanceDisks(self, instance)
11769             raise errors.OpExecError("Could not start instance: %s" % msg)
11770
11771         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11772           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11773         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11774           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11775           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11776
11777           (key_name, _, _) = self.x509_key_name
11778
11779           dest_ca_pem = \
11780             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11781                                             self.dest_x509_ca)
11782
11783           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11784                                                      key_name, dest_ca_pem,
11785                                                      timeouts)
11786       finally:
11787         helper.Cleanup()
11788
11789       # Check for backwards compatibility
11790       assert len(dresults) == len(instance.disks)
11791       assert compat.all(isinstance(i, bool) for i in dresults), \
11792              "Not all results are boolean: %r" % dresults
11793
11794     finally:
11795       if activate_disks:
11796         feedback_fn("Deactivating disks for %s" % instance.name)
11797         _ShutdownInstanceDisks(self, instance)
11798
11799     if not (compat.all(dresults) and fin_resu):
11800       failures = []
11801       if not fin_resu:
11802         failures.append("export finalization")
11803       if not compat.all(dresults):
11804         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11805                                if not dsk)
11806         failures.append("disk export: disk(s) %s" % fdsk)
11807
11808       raise errors.OpExecError("Export failed, errors in %s" %
11809                                utils.CommaJoin(failures))
11810
11811     # At this point, the export was successful, we can cleanup/finish
11812
11813     # Remove instance if requested
11814     if self.op.remove_instance:
11815       feedback_fn("Removing instance %s" % instance.name)
11816       _RemoveInstance(self, feedback_fn, instance,
11817                       self.op.ignore_remove_failures)
11818
11819     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11820       self._CleanupExports(feedback_fn)
11821
11822     return fin_resu, dresults
11823
11824
11825 class LUBackupRemove(NoHooksLU):
11826   """Remove exports related to the named instance.
11827
11828   """
11829   REQ_BGL = False
11830
11831   def ExpandNames(self):
11832     self.needed_locks = {}
11833     # We need all nodes to be locked in order for RemoveExport to work, but we
11834     # don't need to lock the instance itself, as nothing will happen to it (and
11835     # we can remove exports also for a removed instance)
11836     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11837
11838   def Exec(self, feedback_fn):
11839     """Remove any export.
11840
11841     """
11842     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11843     # If the instance was not found we'll try with the name that was passed in.
11844     # This will only work if it was an FQDN, though.
11845     fqdn_warn = False
11846     if not instance_name:
11847       fqdn_warn = True
11848       instance_name = self.op.instance_name
11849
11850     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11851     exportlist = self.rpc.call_export_list(locked_nodes)
11852     found = False
11853     for node in exportlist:
11854       msg = exportlist[node].fail_msg
11855       if msg:
11856         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11857         continue
11858       if instance_name in exportlist[node].payload:
11859         found = True
11860         result = self.rpc.call_export_remove(node, instance_name)
11861         msg = result.fail_msg
11862         if msg:
11863           logging.error("Could not remove export for instance %s"
11864                         " on node %s: %s", instance_name, node, msg)
11865
11866     if fqdn_warn and not found:
11867       feedback_fn("Export not found. If trying to remove an export belonging"
11868                   " to a deleted instance please use its Fully Qualified"
11869                   " Domain Name.")
11870
11871
11872 class LUGroupAdd(LogicalUnit):
11873   """Logical unit for creating node groups.
11874
11875   """
11876   HPATH = "group-add"
11877   HTYPE = constants.HTYPE_GROUP
11878   REQ_BGL = False
11879
11880   def ExpandNames(self):
11881     # We need the new group's UUID here so that we can create and acquire the
11882     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11883     # that it should not check whether the UUID exists in the configuration.
11884     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11885     self.needed_locks = {}
11886     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11887
11888   def CheckPrereq(self):
11889     """Check prerequisites.
11890
11891     This checks that the given group name is not an existing node group
11892     already.
11893
11894     """
11895     try:
11896       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11897     except errors.OpPrereqError:
11898       pass
11899     else:
11900       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11901                                  " node group (UUID: %s)" %
11902                                  (self.op.group_name, existing_uuid),
11903                                  errors.ECODE_EXISTS)
11904
11905     if self.op.ndparams:
11906       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11907
11908   def BuildHooksEnv(self):
11909     """Build hooks env.
11910
11911     """
11912     return {
11913       "GROUP_NAME": self.op.group_name,
11914       }
11915
11916   def BuildHooksNodes(self):
11917     """Build hooks nodes.
11918
11919     """
11920     mn = self.cfg.GetMasterNode()
11921     return ([mn], [mn])
11922
11923   def Exec(self, feedback_fn):
11924     """Add the node group to the cluster.
11925
11926     """
11927     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11928                                   uuid=self.group_uuid,
11929                                   alloc_policy=self.op.alloc_policy,
11930                                   ndparams=self.op.ndparams)
11931
11932     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11933     del self.remove_locks[locking.LEVEL_NODEGROUP]
11934
11935
11936 class LUGroupAssignNodes(NoHooksLU):
11937   """Logical unit for assigning nodes to groups.
11938
11939   """
11940   REQ_BGL = False
11941
11942   def ExpandNames(self):
11943     # These raise errors.OpPrereqError on their own:
11944     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11945     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11946
11947     # We want to lock all the affected nodes and groups. We have readily
11948     # available the list of nodes, and the *destination* group. To gather the
11949     # list of "source" groups, we need to fetch node information later on.
11950     self.needed_locks = {
11951       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11952       locking.LEVEL_NODE: self.op.nodes,
11953       }
11954
11955   def DeclareLocks(self, level):
11956     if level == locking.LEVEL_NODEGROUP:
11957       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11958
11959       # Try to get all affected nodes' groups without having the group or node
11960       # lock yet. Needs verification later in the code flow.
11961       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11962
11963       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11964
11965   def CheckPrereq(self):
11966     """Check prerequisites.
11967
11968     """
11969     assert self.needed_locks[locking.LEVEL_NODEGROUP]
11970     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11971             frozenset(self.op.nodes))
11972
11973     expected_locks = (set([self.group_uuid]) |
11974                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11975     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11976     if actual_locks != expected_locks:
11977       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11978                                " current groups are '%s', used to be '%s'" %
11979                                (utils.CommaJoin(expected_locks),
11980                                 utils.CommaJoin(actual_locks)))
11981
11982     self.node_data = self.cfg.GetAllNodesInfo()
11983     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11984     instance_data = self.cfg.GetAllInstancesInfo()
11985
11986     if self.group is None:
11987       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11988                                (self.op.group_name, self.group_uuid))
11989
11990     (new_splits, previous_splits) = \
11991       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11992                                              for node in self.op.nodes],
11993                                             self.node_data, instance_data)
11994
11995     if new_splits:
11996       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11997
11998       if not self.op.force:
11999         raise errors.OpExecError("The following instances get split by this"
12000                                  " change and --force was not given: %s" %
12001                                  fmt_new_splits)
12002       else:
12003         self.LogWarning("This operation will split the following instances: %s",
12004                         fmt_new_splits)
12005
12006         if previous_splits:
12007           self.LogWarning("In addition, these already-split instances continue"
12008                           " to be split across groups: %s",
12009                           utils.CommaJoin(utils.NiceSort(previous_splits)))
12010
12011   def Exec(self, feedback_fn):
12012     """Assign nodes to a new group.
12013
12014     """
12015     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12016
12017     self.cfg.AssignGroupNodes(mods)
12018
12019   @staticmethod
12020   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12021     """Check for split instances after a node assignment.
12022
12023     This method considers a series of node assignments as an atomic operation,
12024     and returns information about split instances after applying the set of
12025     changes.
12026
12027     In particular, it returns information about newly split instances, and
12028     instances that were already split, and remain so after the change.
12029
12030     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12031     considered.
12032
12033     @type changes: list of (node_name, new_group_uuid) pairs.
12034     @param changes: list of node assignments to consider.
12035     @param node_data: a dict with data for all nodes
12036     @param instance_data: a dict with all instances to consider
12037     @rtype: a two-tuple
12038     @return: a list of instances that were previously okay and result split as a
12039       consequence of this change, and a list of instances that were previously
12040       split and this change does not fix.
12041
12042     """
12043     changed_nodes = dict((node, group) for node, group in changes
12044                          if node_data[node].group != group)
12045
12046     all_split_instances = set()
12047     previously_split_instances = set()
12048
12049     def InstanceNodes(instance):
12050       return [instance.primary_node] + list(instance.secondary_nodes)
12051
12052     for inst in instance_data.values():
12053       if inst.disk_template not in constants.DTS_INT_MIRROR:
12054         continue
12055
12056       instance_nodes = InstanceNodes(inst)
12057
12058       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12059         previously_split_instances.add(inst.name)
12060
12061       if len(set(changed_nodes.get(node, node_data[node].group)
12062                  for node in instance_nodes)) > 1:
12063         all_split_instances.add(inst.name)
12064
12065     return (list(all_split_instances - previously_split_instances),
12066             list(previously_split_instances & all_split_instances))
12067
12068
12069 class _GroupQuery(_QueryBase):
12070   FIELDS = query.GROUP_FIELDS
12071
12072   def ExpandNames(self, lu):
12073     lu.needed_locks = {}
12074
12075     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12076     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12077
12078     if not self.names:
12079       self.wanted = [name_to_uuid[name]
12080                      for name in utils.NiceSort(name_to_uuid.keys())]
12081     else:
12082       # Accept names to be either names or UUIDs.
12083       missing = []
12084       self.wanted = []
12085       all_uuid = frozenset(self._all_groups.keys())
12086
12087       for name in self.names:
12088         if name in all_uuid:
12089           self.wanted.append(name)
12090         elif name in name_to_uuid:
12091           self.wanted.append(name_to_uuid[name])
12092         else:
12093           missing.append(name)
12094
12095       if missing:
12096         raise errors.OpPrereqError("Some groups do not exist: %s" %
12097                                    utils.CommaJoin(missing),
12098                                    errors.ECODE_NOENT)
12099
12100   def DeclareLocks(self, lu, level):
12101     pass
12102
12103   def _GetQueryData(self, lu):
12104     """Computes the list of node groups and their attributes.
12105
12106     """
12107     do_nodes = query.GQ_NODE in self.requested_data
12108     do_instances = query.GQ_INST in self.requested_data
12109
12110     group_to_nodes = None
12111     group_to_instances = None
12112
12113     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12114     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12115     # latter GetAllInstancesInfo() is not enough, for we have to go through
12116     # instance->node. Hence, we will need to process nodes even if we only need
12117     # instance information.
12118     if do_nodes or do_instances:
12119       all_nodes = lu.cfg.GetAllNodesInfo()
12120       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12121       node_to_group = {}
12122
12123       for node in all_nodes.values():
12124         if node.group in group_to_nodes:
12125           group_to_nodes[node.group].append(node.name)
12126           node_to_group[node.name] = node.group
12127
12128       if do_instances:
12129         all_instances = lu.cfg.GetAllInstancesInfo()
12130         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12131
12132         for instance in all_instances.values():
12133           node = instance.primary_node
12134           if node in node_to_group:
12135             group_to_instances[node_to_group[node]].append(instance.name)
12136
12137         if not do_nodes:
12138           # Do not pass on node information if it was not requested.
12139           group_to_nodes = None
12140
12141     return query.GroupQueryData([self._all_groups[uuid]
12142                                  for uuid in self.wanted],
12143                                 group_to_nodes, group_to_instances)
12144
12145
12146 class LUGroupQuery(NoHooksLU):
12147   """Logical unit for querying node groups.
12148
12149   """
12150   REQ_BGL = False
12151
12152   def CheckArguments(self):
12153     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12154                           self.op.output_fields, False)
12155
12156   def ExpandNames(self):
12157     self.gq.ExpandNames(self)
12158
12159   def DeclareLocks(self, level):
12160     self.gq.DeclareLocks(self, level)
12161
12162   def Exec(self, feedback_fn):
12163     return self.gq.OldStyleQuery(self)
12164
12165
12166 class LUGroupSetParams(LogicalUnit):
12167   """Modifies the parameters of a node group.
12168
12169   """
12170   HPATH = "group-modify"
12171   HTYPE = constants.HTYPE_GROUP
12172   REQ_BGL = False
12173
12174   def CheckArguments(self):
12175     all_changes = [
12176       self.op.ndparams,
12177       self.op.alloc_policy,
12178       ]
12179
12180     if all_changes.count(None) == len(all_changes):
12181       raise errors.OpPrereqError("Please pass at least one modification",
12182                                  errors.ECODE_INVAL)
12183
12184   def ExpandNames(self):
12185     # This raises errors.OpPrereqError on its own:
12186     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12187
12188     self.needed_locks = {
12189       locking.LEVEL_NODEGROUP: [self.group_uuid],
12190       }
12191
12192   def CheckPrereq(self):
12193     """Check prerequisites.
12194
12195     """
12196     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12197
12198     if self.group is None:
12199       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12200                                (self.op.group_name, self.group_uuid))
12201
12202     if self.op.ndparams:
12203       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12204       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12205       self.new_ndparams = new_ndparams
12206
12207   def BuildHooksEnv(self):
12208     """Build hooks env.
12209
12210     """
12211     return {
12212       "GROUP_NAME": self.op.group_name,
12213       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12214       }
12215
12216   def BuildHooksNodes(self):
12217     """Build hooks nodes.
12218
12219     """
12220     mn = self.cfg.GetMasterNode()
12221     return ([mn], [mn])
12222
12223   def Exec(self, feedback_fn):
12224     """Modifies the node group.
12225
12226     """
12227     result = []
12228
12229     if self.op.ndparams:
12230       self.group.ndparams = self.new_ndparams
12231       result.append(("ndparams", str(self.group.ndparams)))
12232
12233     if self.op.alloc_policy:
12234       self.group.alloc_policy = self.op.alloc_policy
12235
12236     self.cfg.Update(self.group, feedback_fn)
12237     return result
12238
12239
12240 class LUGroupRemove(LogicalUnit):
12241   HPATH = "group-remove"
12242   HTYPE = constants.HTYPE_GROUP
12243   REQ_BGL = False
12244
12245   def ExpandNames(self):
12246     # This will raises errors.OpPrereqError on its own:
12247     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12248     self.needed_locks = {
12249       locking.LEVEL_NODEGROUP: [self.group_uuid],
12250       }
12251
12252   def CheckPrereq(self):
12253     """Check prerequisites.
12254
12255     This checks that the given group name exists as a node group, that is
12256     empty (i.e., contains no nodes), and that is not the last group of the
12257     cluster.
12258
12259     """
12260     # Verify that the group is empty.
12261     group_nodes = [node.name
12262                    for node in self.cfg.GetAllNodesInfo().values()
12263                    if node.group == self.group_uuid]
12264
12265     if group_nodes:
12266       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12267                                  " nodes: %s" %
12268                                  (self.op.group_name,
12269                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12270                                  errors.ECODE_STATE)
12271
12272     # Verify the cluster would not be left group-less.
12273     if len(self.cfg.GetNodeGroupList()) == 1:
12274       raise errors.OpPrereqError("Group '%s' is the only group,"
12275                                  " cannot be removed" %
12276                                  self.op.group_name,
12277                                  errors.ECODE_STATE)
12278
12279   def BuildHooksEnv(self):
12280     """Build hooks env.
12281
12282     """
12283     return {
12284       "GROUP_NAME": self.op.group_name,
12285       }
12286
12287   def BuildHooksNodes(self):
12288     """Build hooks nodes.
12289
12290     """
12291     mn = self.cfg.GetMasterNode()
12292     return ([mn], [mn])
12293
12294   def Exec(self, feedback_fn):
12295     """Remove the node group.
12296
12297     """
12298     try:
12299       self.cfg.RemoveNodeGroup(self.group_uuid)
12300     except errors.ConfigurationError:
12301       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12302                                (self.op.group_name, self.group_uuid))
12303
12304     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12305
12306
12307 class LUGroupRename(LogicalUnit):
12308   HPATH = "group-rename"
12309   HTYPE = constants.HTYPE_GROUP
12310   REQ_BGL = False
12311
12312   def ExpandNames(self):
12313     # This raises errors.OpPrereqError on its own:
12314     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12315
12316     self.needed_locks = {
12317       locking.LEVEL_NODEGROUP: [self.group_uuid],
12318       }
12319
12320   def CheckPrereq(self):
12321     """Check prerequisites.
12322
12323     Ensures requested new name is not yet used.
12324
12325     """
12326     try:
12327       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12328     except errors.OpPrereqError:
12329       pass
12330     else:
12331       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12332                                  " node group (UUID: %s)" %
12333                                  (self.op.new_name, new_name_uuid),
12334                                  errors.ECODE_EXISTS)
12335
12336   def BuildHooksEnv(self):
12337     """Build hooks env.
12338
12339     """
12340     return {
12341       "OLD_NAME": self.op.group_name,
12342       "NEW_NAME": self.op.new_name,
12343       }
12344
12345   def BuildHooksNodes(self):
12346     """Build hooks nodes.
12347
12348     """
12349     mn = self.cfg.GetMasterNode()
12350
12351     all_nodes = self.cfg.GetAllNodesInfo()
12352     all_nodes.pop(mn, None)
12353
12354     run_nodes = [mn]
12355     run_nodes.extend(node.name for node in all_nodes.values()
12356                      if node.group == self.group_uuid)
12357
12358     return (run_nodes, run_nodes)
12359
12360   def Exec(self, feedback_fn):
12361     """Rename the node group.
12362
12363     """
12364     group = self.cfg.GetNodeGroup(self.group_uuid)
12365
12366     if group is None:
12367       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12368                                (self.op.group_name, self.group_uuid))
12369
12370     group.name = self.op.new_name
12371     self.cfg.Update(group, feedback_fn)
12372
12373     return self.op.new_name
12374
12375
12376 class LUGroupEvacuate(LogicalUnit):
12377   HPATH = "group-evacuate"
12378   HTYPE = constants.HTYPE_GROUP
12379   REQ_BGL = False
12380
12381   def ExpandNames(self):
12382     # This raises errors.OpPrereqError on its own:
12383     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12384
12385     if self.op.target_groups:
12386       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12387                                   self.op.target_groups)
12388     else:
12389       self.req_target_uuids = []
12390
12391     if self.group_uuid in self.req_target_uuids:
12392       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12393                                  " as a target group (targets are %s)" %
12394                                  (self.group_uuid,
12395                                   utils.CommaJoin(self.req_target_uuids)),
12396                                  errors.ECODE_INVAL)
12397
12398     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12399
12400     self.share_locks = _ShareAll()
12401     self.needed_locks = {
12402       locking.LEVEL_INSTANCE: [],
12403       locking.LEVEL_NODEGROUP: [],
12404       locking.LEVEL_NODE: [],
12405       }
12406
12407   def DeclareLocks(self, level):
12408     if level == locking.LEVEL_INSTANCE:
12409       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12410
12411       # Lock instances optimistically, needs verification once node and group
12412       # locks have been acquired
12413       self.needed_locks[locking.LEVEL_INSTANCE] = \
12414         self.cfg.GetNodeGroupInstances(self.group_uuid)
12415
12416     elif level == locking.LEVEL_NODEGROUP:
12417       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12418
12419       if self.req_target_uuids:
12420         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12421
12422         # Lock all groups used by instances optimistically; this requires going
12423         # via the node before it's locked, requiring verification later on
12424         lock_groups.update(group_uuid
12425                            for instance_name in
12426                              self.owned_locks(locking.LEVEL_INSTANCE)
12427                            for group_uuid in
12428                              self.cfg.GetInstanceNodeGroups(instance_name))
12429       else:
12430         # No target groups, need to lock all of them
12431         lock_groups = locking.ALL_SET
12432
12433       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12434
12435     elif level == locking.LEVEL_NODE:
12436       # This will only lock the nodes in the group to be evacuated which
12437       # contain actual instances
12438       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12439       self._LockInstancesNodes()
12440
12441       # Lock all nodes in group to be evacuated and target groups
12442       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12443       assert self.group_uuid in owned_groups
12444       member_nodes = [node_name
12445                       for group in owned_groups
12446                       for node_name in self.cfg.GetNodeGroup(group).members]
12447       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12448
12449   def CheckPrereq(self):
12450     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12451     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12452     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12453
12454     assert owned_groups.issuperset(self.req_target_uuids)
12455     assert self.group_uuid in owned_groups
12456
12457     # Check if locked instances are still correct
12458     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12459
12460     # Get instance information
12461     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12462
12463     # Check if node groups for locked instances are still correct
12464     for instance_name in owned_instances:
12465       inst = self.instances[instance_name]
12466       assert owned_nodes.issuperset(inst.all_nodes), \
12467         "Instance %s's nodes changed while we kept the lock" % instance_name
12468
12469       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12470                                              owned_groups)
12471
12472       assert self.group_uuid in inst_groups, \
12473         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12474
12475     if self.req_target_uuids:
12476       # User requested specific target groups
12477       self.target_uuids = self.req_target_uuids
12478     else:
12479       # All groups except the one to be evacuated are potential targets
12480       self.target_uuids = [group_uuid for group_uuid in owned_groups
12481                            if group_uuid != self.group_uuid]
12482
12483       if not self.target_uuids:
12484         raise errors.OpPrereqError("There are no possible target groups",
12485                                    errors.ECODE_INVAL)
12486
12487   def BuildHooksEnv(self):
12488     """Build hooks env.
12489
12490     """
12491     return {
12492       "GROUP_NAME": self.op.group_name,
12493       "TARGET_GROUPS": " ".join(self.target_uuids),
12494       }
12495
12496   def BuildHooksNodes(self):
12497     """Build hooks nodes.
12498
12499     """
12500     mn = self.cfg.GetMasterNode()
12501
12502     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12503
12504     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12505
12506     return (run_nodes, run_nodes)
12507
12508   def Exec(self, feedback_fn):
12509     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12510
12511     assert self.group_uuid not in self.target_uuids
12512
12513     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12514                      instances=instances, target_groups=self.target_uuids)
12515
12516     ial.Run(self.op.iallocator)
12517
12518     if not ial.success:
12519       raise errors.OpPrereqError("Can't compute group evacuation using"
12520                                  " iallocator '%s': %s" %
12521                                  (self.op.iallocator, ial.info),
12522                                  errors.ECODE_NORES)
12523
12524     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12525
12526     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12527                  len(jobs), self.op.group_name)
12528
12529     return ResultWithJobs(jobs)
12530
12531
12532 class TagsLU(NoHooksLU): # pylint: disable=W0223
12533   """Generic tags LU.
12534
12535   This is an abstract class which is the parent of all the other tags LUs.
12536
12537   """
12538   def ExpandNames(self):
12539     self.group_uuid = None
12540     self.needed_locks = {}
12541     if self.op.kind == constants.TAG_NODE:
12542       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12543       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12544     elif self.op.kind == constants.TAG_INSTANCE:
12545       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12546       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12547     elif self.op.kind == constants.TAG_NODEGROUP:
12548       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12549
12550     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12551     # not possible to acquire the BGL based on opcode parameters)
12552
12553   def CheckPrereq(self):
12554     """Check prerequisites.
12555
12556     """
12557     if self.op.kind == constants.TAG_CLUSTER:
12558       self.target = self.cfg.GetClusterInfo()
12559     elif self.op.kind == constants.TAG_NODE:
12560       self.target = self.cfg.GetNodeInfo(self.op.name)
12561     elif self.op.kind == constants.TAG_INSTANCE:
12562       self.target = self.cfg.GetInstanceInfo(self.op.name)
12563     elif self.op.kind == constants.TAG_NODEGROUP:
12564       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12565     else:
12566       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12567                                  str(self.op.kind), errors.ECODE_INVAL)
12568
12569
12570 class LUTagsGet(TagsLU):
12571   """Returns the tags of a given object.
12572
12573   """
12574   REQ_BGL = False
12575
12576   def ExpandNames(self):
12577     TagsLU.ExpandNames(self)
12578
12579     # Share locks as this is only a read operation
12580     self.share_locks = _ShareAll()
12581
12582   def Exec(self, feedback_fn):
12583     """Returns the tag list.
12584
12585     """
12586     return list(self.target.GetTags())
12587
12588
12589 class LUTagsSearch(NoHooksLU):
12590   """Searches the tags for a given pattern.
12591
12592   """
12593   REQ_BGL = False
12594
12595   def ExpandNames(self):
12596     self.needed_locks = {}
12597
12598   def CheckPrereq(self):
12599     """Check prerequisites.
12600
12601     This checks the pattern passed for validity by compiling it.
12602
12603     """
12604     try:
12605       self.re = re.compile(self.op.pattern)
12606     except re.error, err:
12607       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12608                                  (self.op.pattern, err), errors.ECODE_INVAL)
12609
12610   def Exec(self, feedback_fn):
12611     """Returns the tag list.
12612
12613     """
12614     cfg = self.cfg
12615     tgts = [("/cluster", cfg.GetClusterInfo())]
12616     ilist = cfg.GetAllInstancesInfo().values()
12617     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12618     nlist = cfg.GetAllNodesInfo().values()
12619     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12620     tgts.extend(("/nodegroup/%s" % n.name, n)
12621                 for n in cfg.GetAllNodeGroupsInfo().values())
12622     results = []
12623     for path, target in tgts:
12624       for tag in target.GetTags():
12625         if self.re.search(tag):
12626           results.append((path, tag))
12627     return results
12628
12629
12630 class LUTagsSet(TagsLU):
12631   """Sets a tag on a given object.
12632
12633   """
12634   REQ_BGL = False
12635
12636   def CheckPrereq(self):
12637     """Check prerequisites.
12638
12639     This checks the type and length of the tag name and value.
12640
12641     """
12642     TagsLU.CheckPrereq(self)
12643     for tag in self.op.tags:
12644       objects.TaggableObject.ValidateTag(tag)
12645
12646   def Exec(self, feedback_fn):
12647     """Sets the tag.
12648
12649     """
12650     try:
12651       for tag in self.op.tags:
12652         self.target.AddTag(tag)
12653     except errors.TagError, err:
12654       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12655     self.cfg.Update(self.target, feedback_fn)
12656
12657
12658 class LUTagsDel(TagsLU):
12659   """Delete a list of tags from a given object.
12660
12661   """
12662   REQ_BGL = False
12663
12664   def CheckPrereq(self):
12665     """Check prerequisites.
12666
12667     This checks that we have the given tag.
12668
12669     """
12670     TagsLU.CheckPrereq(self)
12671     for tag in self.op.tags:
12672       objects.TaggableObject.ValidateTag(tag)
12673     del_tags = frozenset(self.op.tags)
12674     cur_tags = self.target.GetTags()
12675
12676     diff_tags = del_tags - cur_tags
12677     if diff_tags:
12678       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12679       raise errors.OpPrereqError("Tag(s) %s not found" %
12680                                  (utils.CommaJoin(diff_names), ),
12681                                  errors.ECODE_NOENT)
12682
12683   def Exec(self, feedback_fn):
12684     """Remove the tag from the object.
12685
12686     """
12687     for tag in self.op.tags:
12688       self.target.RemoveTag(tag)
12689     self.cfg.Update(self.target, feedback_fn)
12690
12691
12692 class LUTestDelay(NoHooksLU):
12693   """Sleep for a specified amount of time.
12694
12695   This LU sleeps on the master and/or nodes for a specified amount of
12696   time.
12697
12698   """
12699   REQ_BGL = False
12700
12701   def ExpandNames(self):
12702     """Expand names and set required locks.
12703
12704     This expands the node list, if any.
12705
12706     """
12707     self.needed_locks = {}
12708     if self.op.on_nodes:
12709       # _GetWantedNodes can be used here, but is not always appropriate to use
12710       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12711       # more information.
12712       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12713       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12714
12715   def _TestDelay(self):
12716     """Do the actual sleep.
12717
12718     """
12719     if self.op.on_master:
12720       if not utils.TestDelay(self.op.duration):
12721         raise errors.OpExecError("Error during master delay test")
12722     if self.op.on_nodes:
12723       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12724       for node, node_result in result.items():
12725         node_result.Raise("Failure during rpc call to node %s" % node)
12726
12727   def Exec(self, feedback_fn):
12728     """Execute the test delay opcode, with the wanted repetitions.
12729
12730     """
12731     if self.op.repeat == 0:
12732       self._TestDelay()
12733     else:
12734       top_value = self.op.repeat - 1
12735       for i in range(self.op.repeat):
12736         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12737         self._TestDelay()
12738
12739
12740 class LUTestJqueue(NoHooksLU):
12741   """Utility LU to test some aspects of the job queue.
12742
12743   """
12744   REQ_BGL = False
12745
12746   # Must be lower than default timeout for WaitForJobChange to see whether it
12747   # notices changed jobs
12748   _CLIENT_CONNECT_TIMEOUT = 20.0
12749   _CLIENT_CONFIRM_TIMEOUT = 60.0
12750
12751   @classmethod
12752   def _NotifyUsingSocket(cls, cb, errcls):
12753     """Opens a Unix socket and waits for another program to connect.
12754
12755     @type cb: callable
12756     @param cb: Callback to send socket name to client
12757     @type errcls: class
12758     @param errcls: Exception class to use for errors
12759
12760     """
12761     # Using a temporary directory as there's no easy way to create temporary
12762     # sockets without writing a custom loop around tempfile.mktemp and
12763     # socket.bind
12764     tmpdir = tempfile.mkdtemp()
12765     try:
12766       tmpsock = utils.PathJoin(tmpdir, "sock")
12767
12768       logging.debug("Creating temporary socket at %s", tmpsock)
12769       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12770       try:
12771         sock.bind(tmpsock)
12772         sock.listen(1)
12773
12774         # Send details to client
12775         cb(tmpsock)
12776
12777         # Wait for client to connect before continuing
12778         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12779         try:
12780           (conn, _) = sock.accept()
12781         except socket.error, err:
12782           raise errcls("Client didn't connect in time (%s)" % err)
12783       finally:
12784         sock.close()
12785     finally:
12786       # Remove as soon as client is connected
12787       shutil.rmtree(tmpdir)
12788
12789     # Wait for client to close
12790     try:
12791       try:
12792         # pylint: disable=E1101
12793         # Instance of '_socketobject' has no ... member
12794         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12795         conn.recv(1)
12796       except socket.error, err:
12797         raise errcls("Client failed to confirm notification (%s)" % err)
12798     finally:
12799       conn.close()
12800
12801   def _SendNotification(self, test, arg, sockname):
12802     """Sends a notification to the client.
12803
12804     @type test: string
12805     @param test: Test name
12806     @param arg: Test argument (depends on test)
12807     @type sockname: string
12808     @param sockname: Socket path
12809
12810     """
12811     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12812
12813   def _Notify(self, prereq, test, arg):
12814     """Notifies the client of a test.
12815
12816     @type prereq: bool
12817     @param prereq: Whether this is a prereq-phase test
12818     @type test: string
12819     @param test: Test name
12820     @param arg: Test argument (depends on test)
12821
12822     """
12823     if prereq:
12824       errcls = errors.OpPrereqError
12825     else:
12826       errcls = errors.OpExecError
12827
12828     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12829                                                   test, arg),
12830                                    errcls)
12831
12832   def CheckArguments(self):
12833     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12834     self.expandnames_calls = 0
12835
12836   def ExpandNames(self):
12837     checkargs_calls = getattr(self, "checkargs_calls", 0)
12838     if checkargs_calls < 1:
12839       raise errors.ProgrammerError("CheckArguments was not called")
12840
12841     self.expandnames_calls += 1
12842
12843     if self.op.notify_waitlock:
12844       self._Notify(True, constants.JQT_EXPANDNAMES, None)
12845
12846     self.LogInfo("Expanding names")
12847
12848     # Get lock on master node (just to get a lock, not for a particular reason)
12849     self.needed_locks = {
12850       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12851       }
12852
12853   def Exec(self, feedback_fn):
12854     if self.expandnames_calls < 1:
12855       raise errors.ProgrammerError("ExpandNames was not called")
12856
12857     if self.op.notify_exec:
12858       self._Notify(False, constants.JQT_EXEC, None)
12859
12860     self.LogInfo("Executing")
12861
12862     if self.op.log_messages:
12863       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12864       for idx, msg in enumerate(self.op.log_messages):
12865         self.LogInfo("Sending log message %s", idx + 1)
12866         feedback_fn(constants.JQT_MSGPREFIX + msg)
12867         # Report how many test messages have been sent
12868         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12869
12870     if self.op.fail:
12871       raise errors.OpExecError("Opcode failure was requested")
12872
12873     return True
12874
12875
12876 class IAllocator(object):
12877   """IAllocator framework.
12878
12879   An IAllocator instance has three sets of attributes:
12880     - cfg that is needed to query the cluster
12881     - input data (all members of the _KEYS class attribute are required)
12882     - four buffer attributes (in|out_data|text), that represent the
12883       input (to the external script) in text and data structure format,
12884       and the output from it, again in two formats
12885     - the result variables from the script (success, info, nodes) for
12886       easy usage
12887
12888   """
12889   # pylint: disable=R0902
12890   # lots of instance attributes
12891
12892   def __init__(self, cfg, rpc, mode, **kwargs):
12893     self.cfg = cfg
12894     self.rpc = rpc
12895     # init buffer variables
12896     self.in_text = self.out_text = self.in_data = self.out_data = None
12897     # init all input fields so that pylint is happy
12898     self.mode = mode
12899     self.memory = self.disks = self.disk_template = None
12900     self.os = self.tags = self.nics = self.vcpus = None
12901     self.hypervisor = None
12902     self.relocate_from = None
12903     self.name = None
12904     self.instances = None
12905     self.evac_mode = None
12906     self.target_groups = []
12907     # computed fields
12908     self.required_nodes = None
12909     # init result fields
12910     self.success = self.info = self.result = None
12911
12912     try:
12913       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12914     except KeyError:
12915       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12916                                    " IAllocator" % self.mode)
12917
12918     keyset = [n for (n, _) in keydata]
12919
12920     for key in kwargs:
12921       if key not in keyset:
12922         raise errors.ProgrammerError("Invalid input parameter '%s' to"
12923                                      " IAllocator" % key)
12924       setattr(self, key, kwargs[key])
12925
12926     for key in keyset:
12927       if key not in kwargs:
12928         raise errors.ProgrammerError("Missing input parameter '%s' to"
12929                                      " IAllocator" % key)
12930     self._BuildInputData(compat.partial(fn, self), keydata)
12931
12932   def _ComputeClusterData(self):
12933     """Compute the generic allocator input data.
12934
12935     This is the data that is independent of the actual operation.
12936
12937     """
12938     cfg = self.cfg
12939     cluster_info = cfg.GetClusterInfo()
12940     # cluster data
12941     data = {
12942       "version": constants.IALLOCATOR_VERSION,
12943       "cluster_name": cfg.GetClusterName(),
12944       "cluster_tags": list(cluster_info.GetTags()),
12945       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12946       # we don't have job IDs
12947       }
12948     ninfo = cfg.GetAllNodesInfo()
12949     iinfo = cfg.GetAllInstancesInfo().values()
12950     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12951
12952     # node data
12953     node_list = [n.name for n in ninfo.values() if n.vm_capable]
12954
12955     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12956       hypervisor_name = self.hypervisor
12957     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12958       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12959     else:
12960       hypervisor_name = cluster_info.enabled_hypervisors[0]
12961
12962     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12963                                         hypervisor_name)
12964     node_iinfo = \
12965       self.rpc.call_all_instances_info(node_list,
12966                                        cluster_info.enabled_hypervisors)
12967
12968     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12969
12970     config_ndata = self._ComputeBasicNodeData(ninfo)
12971     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12972                                                  i_list, config_ndata)
12973     assert len(data["nodes"]) == len(ninfo), \
12974         "Incomplete node data computed"
12975
12976     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12977
12978     self.in_data = data
12979
12980   @staticmethod
12981   def _ComputeNodeGroupData(cfg):
12982     """Compute node groups data.
12983
12984     """
12985     ng = dict((guuid, {
12986       "name": gdata.name,
12987       "alloc_policy": gdata.alloc_policy,
12988       })
12989       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12990
12991     return ng
12992
12993   @staticmethod
12994   def _ComputeBasicNodeData(node_cfg):
12995     """Compute global node data.
12996
12997     @rtype: dict
12998     @returns: a dict of name: (node dict, node config)
12999
13000     """
13001     # fill in static (config-based) values
13002     node_results = dict((ninfo.name, {
13003       "tags": list(ninfo.GetTags()),
13004       "primary_ip": ninfo.primary_ip,
13005       "secondary_ip": ninfo.secondary_ip,
13006       "offline": ninfo.offline,
13007       "drained": ninfo.drained,
13008       "master_candidate": ninfo.master_candidate,
13009       "group": ninfo.group,
13010       "master_capable": ninfo.master_capable,
13011       "vm_capable": ninfo.vm_capable,
13012       })
13013       for ninfo in node_cfg.values())
13014
13015     return node_results
13016
13017   @staticmethod
13018   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13019                               node_results):
13020     """Compute global node data.
13021
13022     @param node_results: the basic node structures as filled from the config
13023
13024     """
13025     # make a copy of the current dict
13026     node_results = dict(node_results)
13027     for nname, nresult in node_data.items():
13028       assert nname in node_results, "Missing basic data for node %s" % nname
13029       ninfo = node_cfg[nname]
13030
13031       if not (ninfo.offline or ninfo.drained):
13032         nresult.Raise("Can't get data for node %s" % nname)
13033         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13034                                 nname)
13035         remote_info = nresult.payload
13036
13037         for attr in ["memory_total", "memory_free", "memory_dom0",
13038                      "vg_size", "vg_free", "cpu_total"]:
13039           if attr not in remote_info:
13040             raise errors.OpExecError("Node '%s' didn't return attribute"
13041                                      " '%s'" % (nname, attr))
13042           if not isinstance(remote_info[attr], int):
13043             raise errors.OpExecError("Node '%s' returned invalid value"
13044                                      " for '%s': %s" %
13045                                      (nname, attr, remote_info[attr]))
13046         # compute memory used by primary instances
13047         i_p_mem = i_p_up_mem = 0
13048         for iinfo, beinfo in i_list:
13049           if iinfo.primary_node == nname:
13050             i_p_mem += beinfo[constants.BE_MEMORY]
13051             if iinfo.name not in node_iinfo[nname].payload:
13052               i_used_mem = 0
13053             else:
13054               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13055             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13056             remote_info["memory_free"] -= max(0, i_mem_diff)
13057
13058             if iinfo.admin_up:
13059               i_p_up_mem += beinfo[constants.BE_MEMORY]
13060
13061         # compute memory used by instances
13062         pnr_dyn = {
13063           "total_memory": remote_info["memory_total"],
13064           "reserved_memory": remote_info["memory_dom0"],
13065           "free_memory": remote_info["memory_free"],
13066           "total_disk": remote_info["vg_size"],
13067           "free_disk": remote_info["vg_free"],
13068           "total_cpus": remote_info["cpu_total"],
13069           "i_pri_memory": i_p_mem,
13070           "i_pri_up_memory": i_p_up_mem,
13071           }
13072         pnr_dyn.update(node_results[nname])
13073         node_results[nname] = pnr_dyn
13074
13075     return node_results
13076
13077   @staticmethod
13078   def _ComputeInstanceData(cluster_info, i_list):
13079     """Compute global instance data.
13080
13081     """
13082     instance_data = {}
13083     for iinfo, beinfo in i_list:
13084       nic_data = []
13085       for nic in iinfo.nics:
13086         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13087         nic_dict = {
13088           "mac": nic.mac,
13089           "ip": nic.ip,
13090           "mode": filled_params[constants.NIC_MODE],
13091           "link": filled_params[constants.NIC_LINK],
13092           }
13093         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13094           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13095         nic_data.append(nic_dict)
13096       pir = {
13097         "tags": list(iinfo.GetTags()),
13098         "admin_up": iinfo.admin_up,
13099         "vcpus": beinfo[constants.BE_VCPUS],
13100         "memory": beinfo[constants.BE_MEMORY],
13101         "os": iinfo.os,
13102         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13103         "nics": nic_data,
13104         "disks": [{constants.IDISK_SIZE: dsk.size,
13105                    constants.IDISK_MODE: dsk.mode}
13106                   for dsk in iinfo.disks],
13107         "disk_template": iinfo.disk_template,
13108         "hypervisor": iinfo.hypervisor,
13109         }
13110       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13111                                                  pir["disks"])
13112       instance_data[iinfo.name] = pir
13113
13114     return instance_data
13115
13116   def _AddNewInstance(self):
13117     """Add new instance data to allocator structure.
13118
13119     This in combination with _AllocatorGetClusterData will create the
13120     correct structure needed as input for the allocator.
13121
13122     The checks for the completeness of the opcode must have already been
13123     done.
13124
13125     """
13126     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13127
13128     if self.disk_template in constants.DTS_INT_MIRROR:
13129       self.required_nodes = 2
13130     else:
13131       self.required_nodes = 1
13132
13133     request = {
13134       "name": self.name,
13135       "disk_template": self.disk_template,
13136       "tags": self.tags,
13137       "os": self.os,
13138       "vcpus": self.vcpus,
13139       "memory": self.memory,
13140       "disks": self.disks,
13141       "disk_space_total": disk_space,
13142       "nics": self.nics,
13143       "required_nodes": self.required_nodes,
13144       "hypervisor": self.hypervisor,
13145       }
13146
13147     return request
13148
13149   def _AddRelocateInstance(self):
13150     """Add relocate instance data to allocator structure.
13151
13152     This in combination with _IAllocatorGetClusterData will create the
13153     correct structure needed as input for the allocator.
13154
13155     The checks for the completeness of the opcode must have already been
13156     done.
13157
13158     """
13159     instance = self.cfg.GetInstanceInfo(self.name)
13160     if instance is None:
13161       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13162                                    " IAllocator" % self.name)
13163
13164     if instance.disk_template not in constants.DTS_MIRRORED:
13165       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13166                                  errors.ECODE_INVAL)
13167
13168     if instance.disk_template in constants.DTS_INT_MIRROR and \
13169         len(instance.secondary_nodes) != 1:
13170       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13171                                  errors.ECODE_STATE)
13172
13173     self.required_nodes = 1
13174     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13175     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13176
13177     request = {
13178       "name": self.name,
13179       "disk_space_total": disk_space,
13180       "required_nodes": self.required_nodes,
13181       "relocate_from": self.relocate_from,
13182       }
13183     return request
13184
13185   def _AddNodeEvacuate(self):
13186     """Get data for node-evacuate requests.
13187
13188     """
13189     return {
13190       "instances": self.instances,
13191       "evac_mode": self.evac_mode,
13192       }
13193
13194   def _AddChangeGroup(self):
13195     """Get data for node-evacuate requests.
13196
13197     """
13198     return {
13199       "instances": self.instances,
13200       "target_groups": self.target_groups,
13201       }
13202
13203   def _BuildInputData(self, fn, keydata):
13204     """Build input data structures.
13205
13206     """
13207     self._ComputeClusterData()
13208
13209     request = fn()
13210     request["type"] = self.mode
13211     for keyname, keytype in keydata:
13212       if keyname not in request:
13213         raise errors.ProgrammerError("Request parameter %s is missing" %
13214                                      keyname)
13215       val = request[keyname]
13216       if not keytype(val):
13217         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13218                                      " validation, value %s, expected"
13219                                      " type %s" % (keyname, val, keytype))
13220     self.in_data["request"] = request
13221
13222     self.in_text = serializer.Dump(self.in_data)
13223
13224   _STRING_LIST = ht.TListOf(ht.TString)
13225   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13226      # pylint: disable=E1101
13227      # Class '...' has no 'OP_ID' member
13228      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13229                           opcodes.OpInstanceMigrate.OP_ID,
13230                           opcodes.OpInstanceReplaceDisks.OP_ID])
13231      })))
13232
13233   _NEVAC_MOVED = \
13234     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13235                        ht.TItems([ht.TNonEmptyString,
13236                                   ht.TNonEmptyString,
13237                                   ht.TListOf(ht.TNonEmptyString),
13238                                  ])))
13239   _NEVAC_FAILED = \
13240     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13241                        ht.TItems([ht.TNonEmptyString,
13242                                   ht.TMaybeString,
13243                                  ])))
13244   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13245                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13246
13247   _MODE_DATA = {
13248     constants.IALLOCATOR_MODE_ALLOC:
13249       (_AddNewInstance,
13250        [
13251         ("name", ht.TString),
13252         ("memory", ht.TInt),
13253         ("disks", ht.TListOf(ht.TDict)),
13254         ("disk_template", ht.TString),
13255         ("os", ht.TString),
13256         ("tags", _STRING_LIST),
13257         ("nics", ht.TListOf(ht.TDict)),
13258         ("vcpus", ht.TInt),
13259         ("hypervisor", ht.TString),
13260         ], ht.TList),
13261     constants.IALLOCATOR_MODE_RELOC:
13262       (_AddRelocateInstance,
13263        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13264        ht.TList),
13265      constants.IALLOCATOR_MODE_NODE_EVAC:
13266       (_AddNodeEvacuate, [
13267         ("instances", _STRING_LIST),
13268         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13269         ], _NEVAC_RESULT),
13270      constants.IALLOCATOR_MODE_CHG_GROUP:
13271       (_AddChangeGroup, [
13272         ("instances", _STRING_LIST),
13273         ("target_groups", _STRING_LIST),
13274         ], _NEVAC_RESULT),
13275     }
13276
13277   def Run(self, name, validate=True, call_fn=None):
13278     """Run an instance allocator and return the results.
13279
13280     """
13281     if call_fn is None:
13282       call_fn = self.rpc.call_iallocator_runner
13283
13284     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13285     result.Raise("Failure while running the iallocator script")
13286
13287     self.out_text = result.payload
13288     if validate:
13289       self._ValidateResult()
13290
13291   def _ValidateResult(self):
13292     """Process the allocator results.
13293
13294     This will process and if successful save the result in
13295     self.out_data and the other parameters.
13296
13297     """
13298     try:
13299       rdict = serializer.Load(self.out_text)
13300     except Exception, err:
13301       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13302
13303     if not isinstance(rdict, dict):
13304       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13305
13306     # TODO: remove backwards compatiblity in later versions
13307     if "nodes" in rdict and "result" not in rdict:
13308       rdict["result"] = rdict["nodes"]
13309       del rdict["nodes"]
13310
13311     for key in "success", "info", "result":
13312       if key not in rdict:
13313         raise errors.OpExecError("Can't parse iallocator results:"
13314                                  " missing key '%s'" % key)
13315       setattr(self, key, rdict[key])
13316
13317     if not self._result_check(self.result):
13318       raise errors.OpExecError("Iallocator returned invalid result,"
13319                                " expected %s, got %s" %
13320                                (self._result_check, self.result),
13321                                errors.ECODE_INVAL)
13322
13323     if self.mode == constants.IALLOCATOR_MODE_RELOC:
13324       assert self.relocate_from is not None
13325       assert self.required_nodes == 1
13326
13327       node2group = dict((name, ndata["group"])
13328                         for (name, ndata) in self.in_data["nodes"].items())
13329
13330       fn = compat.partial(self._NodesToGroups, node2group,
13331                           self.in_data["nodegroups"])
13332
13333       instance = self.cfg.GetInstanceInfo(self.name)
13334       request_groups = fn(self.relocate_from + [instance.primary_node])
13335       result_groups = fn(rdict["result"] + [instance.primary_node])
13336
13337       if self.success and not set(result_groups).issubset(request_groups):
13338         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13339                                  " differ from original groups (%s)" %
13340                                  (utils.CommaJoin(result_groups),
13341                                   utils.CommaJoin(request_groups)))
13342
13343     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13344       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13345
13346     self.out_data = rdict
13347
13348   @staticmethod
13349   def _NodesToGroups(node2group, groups, nodes):
13350     """Returns a list of unique group names for a list of nodes.
13351
13352     @type node2group: dict
13353     @param node2group: Map from node name to group UUID
13354     @type groups: dict
13355     @param groups: Group information
13356     @type nodes: list
13357     @param nodes: Node names
13358
13359     """
13360     result = set()
13361
13362     for node in nodes:
13363       try:
13364         group_uuid = node2group[node]
13365       except KeyError:
13366         # Ignore unknown node
13367         pass
13368       else:
13369         try:
13370           group = groups[group_uuid]
13371         except KeyError:
13372           # Can't find group, let's use UUID
13373           group_name = group_uuid
13374         else:
13375           group_name = group["name"]
13376
13377         result.add(group_name)
13378
13379     return sorted(result)
13380
13381
13382 class LUTestAllocator(NoHooksLU):
13383   """Run allocator tests.
13384
13385   This LU runs the allocator tests
13386
13387   """
13388   def CheckPrereq(self):
13389     """Check prerequisites.
13390
13391     This checks the opcode parameters depending on the director and mode test.
13392
13393     """
13394     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13395       for attr in ["memory", "disks", "disk_template",
13396                    "os", "tags", "nics", "vcpus"]:
13397         if not hasattr(self.op, attr):
13398           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13399                                      attr, errors.ECODE_INVAL)
13400       iname = self.cfg.ExpandInstanceName(self.op.name)
13401       if iname is not None:
13402         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13403                                    iname, errors.ECODE_EXISTS)
13404       if not isinstance(self.op.nics, list):
13405         raise errors.OpPrereqError("Invalid parameter 'nics'",
13406                                    errors.ECODE_INVAL)
13407       if not isinstance(self.op.disks, list):
13408         raise errors.OpPrereqError("Invalid parameter 'disks'",
13409                                    errors.ECODE_INVAL)
13410       for row in self.op.disks:
13411         if (not isinstance(row, dict) or
13412             constants.IDISK_SIZE not in row or
13413             not isinstance(row[constants.IDISK_SIZE], int) or
13414             constants.IDISK_MODE not in row or
13415             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13416           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13417                                      " parameter", errors.ECODE_INVAL)
13418       if self.op.hypervisor is None:
13419         self.op.hypervisor = self.cfg.GetHypervisorType()
13420     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13421       fname = _ExpandInstanceName(self.cfg, self.op.name)
13422       self.op.name = fname
13423       self.relocate_from = \
13424           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13425     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13426                           constants.IALLOCATOR_MODE_NODE_EVAC):
13427       if not self.op.instances:
13428         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13429       self.op.instances = _GetWantedInstances(self, self.op.instances)
13430     else:
13431       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13432                                  self.op.mode, errors.ECODE_INVAL)
13433
13434     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13435       if self.op.allocator is None:
13436         raise errors.OpPrereqError("Missing allocator name",
13437                                    errors.ECODE_INVAL)
13438     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13439       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13440                                  self.op.direction, errors.ECODE_INVAL)
13441
13442   def Exec(self, feedback_fn):
13443     """Run the allocator test.
13444
13445     """
13446     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13447       ial = IAllocator(self.cfg, self.rpc,
13448                        mode=self.op.mode,
13449                        name=self.op.name,
13450                        memory=self.op.memory,
13451                        disks=self.op.disks,
13452                        disk_template=self.op.disk_template,
13453                        os=self.op.os,
13454                        tags=self.op.tags,
13455                        nics=self.op.nics,
13456                        vcpus=self.op.vcpus,
13457                        hypervisor=self.op.hypervisor,
13458                        )
13459     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13460       ial = IAllocator(self.cfg, self.rpc,
13461                        mode=self.op.mode,
13462                        name=self.op.name,
13463                        relocate_from=list(self.relocate_from),
13464                        )
13465     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13466       ial = IAllocator(self.cfg, self.rpc,
13467                        mode=self.op.mode,
13468                        instances=self.op.instances,
13469                        target_groups=self.op.target_groups)
13470     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13471       ial = IAllocator(self.cfg, self.rpc,
13472                        mode=self.op.mode,
13473                        instances=self.op.instances,
13474                        evac_mode=self.op.evac_mode)
13475     else:
13476       raise errors.ProgrammerError("Uncatched mode %s in"
13477                                    " LUTestAllocator.Exec", self.op.mode)
13478
13479     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13480       result = ial.in_text
13481     else:
13482       ial.Run(self.op.allocator, validate=False)
13483       result = ial.out_text
13484     return result
13485
13486
13487 #: Query type implementations
13488 _QUERY_IMPL = {
13489   constants.QR_INSTANCE: _InstanceQuery,
13490   constants.QR_NODE: _NodeQuery,
13491   constants.QR_GROUP: _GroupQuery,
13492   constants.QR_OS: _OsQuery,
13493   }
13494
13495 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13496
13497
13498 def _GetQueryImplementation(name):
13499   """Returns the implemtnation for a query type.
13500
13501   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13502
13503   """
13504   try:
13505     return _QUERY_IMPL[name]
13506   except KeyError:
13507     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13508                                errors.ECODE_INVAL)