code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62
  63 import ganeti.masterd.instance # pylint: disable=W0611
  64
  65
  66 class ResultWithJobs:
  67   """Data container for LU results with jobs.
  68
  69   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  70   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  71   contained in the C{jobs} attribute and include the job IDs in the opcode
  72   result.
  73
  74   """
  75   def __init__(self, jobs, **kwargs):
  76     """Initializes this class.
  77
  78     Additional return values can be specified as keyword arguments.
  79
  80     @type jobs: list of lists of L{opcode.OpCode}
  81     @param jobs: A list of lists of opcode objects
  82
  83     """
  84     self.jobs = jobs
  85     self.other = kwargs
  86
  87
  88 class LogicalUnit(object):
  89   """Logical Unit base class.
  90
  91   Subclasses must follow these rules:
  92     - implement ExpandNames
  93     - implement CheckPrereq (except when tasklets are used)
  94     - implement Exec (except when tasklets are used)
  95     - implement BuildHooksEnv
  96     - implement BuildHooksNodes
  97     - redefine HPATH and HTYPE
  98     - optionally redefine their run requirements:
  99         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 100
 101   Note that all commands require root permissions.
 102
 103   @ivar dry_run_result: the value (if any) that will be returned to the caller
 104       in dry-run mode (signalled by opcode dry_run parameter)
 105
 106   """
 107   HPATH = None
 108   HTYPE = None
 109   REQ_BGL = True
 110
 111   def __init__(self, processor, op, context, rpc):
 112     """Constructor for LogicalUnit.
 113
 114     This needs to be overridden in derived classes in order to check op
 115     validity.
 116
 117     """
 118     self.proc = processor
 119     self.op = op
 120     self.cfg = context.cfg
 121     self.glm = context.glm
 122     # readability alias
 123     self.owned_locks = context.glm.list_owned
 124     self.context = context
 125     self.rpc = rpc
 126     # Dicts used to declare locking needs to mcpu
 127     self.needed_locks = None
 128     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 129     self.add_locks = {}
 130     self.remove_locks = {}
 131     # Used to force good behavior when calling helper functions
 132     self.recalculate_locks = {}
 133     # logging
 134     self.Log = processor.Log # pylint: disable=C0103
 135     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 136     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 137     self.LogStep = processor.LogStep # pylint: disable=C0103
 138     # support for dry-run
 139     self.dry_run_result = None
 140     # support for generic debug attribute
 141     if (not hasattr(self.op, "debug_level") or
 142         not isinstance(self.op.debug_level, int)):
 143       self.op.debug_level = 0
 144
 145     # Tasklets
 146     self.tasklets = None
 147
 148     # Validate opcode parameters and set defaults
 149     self.op.Validate(True)
 150
 151     self.CheckArguments()
 152
 153   def CheckArguments(self):
 154     """Check syntactic validity for the opcode arguments.
 155
 156     This method is for doing a simple syntactic check and ensure
 157     validity of opcode parameters, without any cluster-related
 158     checks. While the same can be accomplished in ExpandNames and/or
 159     CheckPrereq, doing these separate is better because:
 160
 161       - ExpandNames is left as as purely a lock-related function
 162       - CheckPrereq is run after we have acquired locks (and possible
 163         waited for them)
 164
 165     The function is allowed to change the self.op attribute so that
 166     later methods can no longer worry about missing parameters.
 167
 168     """
 169     pass
 170
 171   def ExpandNames(self):
 172     """Expand names for this LU.
 173
 174     This method is called before starting to execute the opcode, and it should
 175     update all the parameters of the opcode to their canonical form (e.g. a
 176     short node name must be fully expanded after this method has successfully
 177     completed). This way locking, hooks, logging, etc. can work correctly.
 178
 179     LUs which implement this method must also populate the self.needed_locks
 180     member, as a dict with lock levels as keys, and a list of needed lock names
 181     as values. Rules:
 182
 183       - use an empty dict if you don't need any lock
 184       - if you don't need any lock at a particular level omit that level
 185       - don't put anything for the BGL level
 186       - if you want all locks at a level use locking.ALL_SET as a value
 187
 188     If you need to share locks (rather than acquire them exclusively) at one
 189     level you can modify self.share_locks, setting a true value (usually 1) for
 190     that level. By default locks are not shared.
 191
 192     This function can also define a list of tasklets, which then will be
 193     executed in order instead of the usual LU-level CheckPrereq and Exec
 194     functions, if those are not defined by the LU.
 195
 196     Examples::
 197
 198       # Acquire all nodes and one instance
 199       self.needed_locks = {
 200         locking.LEVEL_NODE: locking.ALL_SET,
 201         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 202       }
 203       # Acquire just two nodes
 204       self.needed_locks = {
 205         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 206       }
 207       # Acquire no locks
 208       self.needed_locks = {} # No, you can't leave it to the default value None
 209
 210     """
 211     # The implementation of this method is mandatory only if the new LU is
 212     # concurrent, so that old LUs don't need to be changed all at the same
 213     # time.
 214     if self.REQ_BGL:
 215       self.needed_locks = {} # Exclusive LUs don't need locks.
 216     else:
 217       raise NotImplementedError
 218
 219   def DeclareLocks(self, level):
 220     """Declare LU locking needs for a level
 221
 222     While most LUs can just declare their locking needs at ExpandNames time,
 223     sometimes there's the need to calculate some locks after having acquired
 224     the ones before. This function is called just before acquiring locks at a
 225     particular level, but after acquiring the ones at lower levels, and permits
 226     such calculations. It can be used to modify self.needed_locks, and by
 227     default it does nothing.
 228
 229     This function is only called if you have something already set in
 230     self.needed_locks for the level.
 231
 232     @param level: Locking level which is going to be locked
 233     @type level: member of ganeti.locking.LEVELS
 234
 235     """
 236
 237   def CheckPrereq(self):
 238     """Check prerequisites for this LU.
 239
 240     This method should check that the prerequisites for the execution
 241     of this LU are fulfilled. It can do internode communication, but
 242     it should be idempotent - no cluster or system changes are
 243     allowed.
 244
 245     The method should raise errors.OpPrereqError in case something is
 246     not fulfilled. Its return value is ignored.
 247
 248     This method should also update all the parameters of the opcode to
 249     their canonical form if it hasn't been done by ExpandNames before.
 250
 251     """
 252     if self.tasklets is not None:
 253       for (idx, tl) in enumerate(self.tasklets):
 254         logging.debug("Checking prerequisites for tasklet %s/%s",
 255                       idx + 1, len(self.tasklets))
 256         tl.CheckPrereq()
 257     else:
 258       pass
 259
 260   def Exec(self, feedback_fn):
 261     """Execute the LU.
 262
 263     This method should implement the actual work. It should raise
 264     errors.OpExecError for failures that are somewhat dealt with in
 265     code, or expected.
 266
 267     """
 268     if self.tasklets is not None:
 269       for (idx, tl) in enumerate(self.tasklets):
 270         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 271         tl.Exec(feedback_fn)
 272     else:
 273       raise NotImplementedError
 274
 275   def BuildHooksEnv(self):
 276     """Build hooks environment for this LU.
 277
 278     @rtype: dict
 279     @return: Dictionary containing the environment that will be used for
 280       running the hooks for this LU. The keys of the dict must not be prefixed
 281       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 282       will extend the environment with additional variables. If no environment
 283       should be defined, an empty dictionary should be returned (not C{None}).
 284     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 285       will not be called.
 286
 287     """
 288     raise NotImplementedError
 289
 290   def BuildHooksNodes(self):
 291     """Build list of nodes to run LU's hooks.
 292
 293     @rtype: tuple; (list, list)
 294     @return: Tuple containing a list of node names on which the hook
 295       should run before the execution and a list of node names on which the
 296       hook should run after the execution. No nodes should be returned as an
 297       empty list (and not None).
 298     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 299       will not be called.
 300
 301     """
 302     raise NotImplementedError
 303
 304   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 305     """Notify the LU about the results of its hooks.
 306
 307     This method is called every time a hooks phase is executed, and notifies
 308     the Logical Unit about the hooks' result. The LU can then use it to alter
 309     its result based on the hooks.  By default the method does nothing and the
 310     previous result is passed back unchanged but any LU can define it if it
 311     wants to use the local cluster hook-scripts somehow.
 312
 313     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 314         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 315     @param hook_results: the results of the multi-node hooks rpc call
 316     @param feedback_fn: function used send feedback back to the caller
 317     @param lu_result: the previous Exec result this LU had, or None
 318         in the PRE phase
 319     @return: the new Exec result, based on the previous result
 320         and hook results
 321
 322     """
 323     # API must be kept, thus we ignore the unused argument and could
 324     # be a function warnings
 325     # pylint: disable=W0613,R0201
 326     return lu_result
 327
 328   def _ExpandAndLockInstance(self):
 329     """Helper function to expand and lock an instance.
 330
 331     Many LUs that work on an instance take its name in self.op.instance_name
 332     and need to expand it and then declare the expanded name for locking. This
 333     function does it, and then updates self.op.instance_name to the expanded
 334     name. It also initializes needed_locks as a dict, if this hasn't been done
 335     before.
 336
 337     """
 338     if self.needed_locks is None:
 339       self.needed_locks = {}
 340     else:
 341       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 342         "_ExpandAndLockInstance called with instance-level locks set"
 343     self.op.instance_name = _ExpandInstanceName(self.cfg,
 344                                                 self.op.instance_name)
 345     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 346
 347   def _LockInstancesNodes(self, primary_only=False):
 348     """Helper function to declare instances' nodes for locking.
 349
 350     This function should be called after locking one or more instances to lock
 351     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 352     with all primary or secondary nodes for instances already locked and
 353     present in self.needed_locks[locking.LEVEL_INSTANCE].
 354
 355     It should be called from DeclareLocks, and for safety only works if
 356     self.recalculate_locks[locking.LEVEL_NODE] is set.
 357
 358     In the future it may grow parameters to just lock some instance's nodes, or
 359     to just lock primaries or secondary nodes, if needed.
 360
 361     If should be called in DeclareLocks in a way similar to::
 362
 363       if level == locking.LEVEL_NODE:
 364         self._LockInstancesNodes()
 365
 366     @type primary_only: boolean
 367     @param primary_only: only lock primary nodes of locked instances
 368
 369     """
 370     assert locking.LEVEL_NODE in self.recalculate_locks, \
 371       "_LockInstancesNodes helper function called with no nodes to recalculate"
 372
 373     # TODO: check if we're really been called with the instance locks held
 374
 375     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 376     # future we might want to have different behaviors depending on the value
 377     # of self.recalculate_locks[locking.LEVEL_NODE]
 378     wanted_nodes = []
 379     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 380     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 381       wanted_nodes.append(instance.primary_node)
 382       if not primary_only:
 383         wanted_nodes.extend(instance.secondary_nodes)
 384
 385     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 386       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 387     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 388       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 389
 390     del self.recalculate_locks[locking.LEVEL_NODE]
 391
 392
 393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 394   """Simple LU which runs no hooks.
 395
 396   This LU is intended as a parent for other LogicalUnits which will
 397   run no hooks, in order to reduce duplicate code.
 398
 399   """
 400   HPATH = None
 401   HTYPE = None
 402
 403   def BuildHooksEnv(self):
 404     """Empty BuildHooksEnv for NoHooksLu.
 405
 406     This just raises an error.
 407
 408     """
 409     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 410
 411   def BuildHooksNodes(self):
 412     """Empty BuildHooksNodes for NoHooksLU.
 413
 414     """
 415     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 416
 417
 418 class Tasklet:
 419   """Tasklet base class.
 420
 421   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 422   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 423   tasklets know nothing about locks.
 424
 425   Subclasses must follow these rules:
 426     - Implement CheckPrereq
 427     - Implement Exec
 428
 429   """
 430   def __init__(self, lu):
 431     self.lu = lu
 432
 433     # Shortcuts
 434     self.cfg = lu.cfg
 435     self.rpc = lu.rpc
 436
 437   def CheckPrereq(self):
 438     """Check prerequisites for this tasklets.
 439
 440     This method should check whether the prerequisites for the execution of
 441     this tasklet are fulfilled. It can do internode communication, but it
 442     should be idempotent - no cluster or system changes are allowed.
 443
 444     The method should raise errors.OpPrereqError in case something is not
 445     fulfilled. Its return value is ignored.
 446
 447     This method should also update all parameters to their canonical form if it
 448     hasn't been done before.
 449
 450     """
 451     pass
 452
 453   def Exec(self, feedback_fn):
 454     """Execute the tasklet.
 455
 456     This method should implement the actual work. It should raise
 457     errors.OpExecError for failures that are somewhat dealt with in code, or
 458     expected.
 459
 460     """
 461     raise NotImplementedError
 462
 463
 464 class _QueryBase:
 465   """Base for query utility classes.
 466
 467   """
 468   #: Attribute holding field definitions
 469   FIELDS = None
 470
 471   def __init__(self, filter_, fields, use_locking):
 472     """Initializes this class.
 473
 474     """
 475     self.use_locking = use_locking
 476
 477     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 478                              namefield="name")
 479     self.requested_data = self.query.RequestedData()
 480     self.names = self.query.RequestedNames()
 481
 482     # Sort only if no names were requested
 483     self.sort_by_name = not self.names
 484
 485     self.do_locking = None
 486     self.wanted = None
 487
 488   def _GetNames(self, lu, all_names, lock_level):
 489     """Helper function to determine names asked for in the query.
 490
 491     """
 492     if self.do_locking:
 493       names = lu.owned_locks(lock_level)
 494     else:
 495       names = all_names
 496
 497     if self.wanted == locking.ALL_SET:
 498       assert not self.names
 499       # caller didn't specify names, so ordering is not important
 500       return utils.NiceSort(names)
 501
 502     # caller specified names and we must keep the same order
 503     assert self.names
 504     assert not self.do_locking or lu.glm.is_owned(lock_level)
 505
 506     missing = set(self.wanted).difference(names)
 507     if missing:
 508       raise errors.OpExecError("Some items were removed before retrieving"
 509                                " their data: %s" % missing)
 510
 511     # Return expanded names
 512     return self.wanted
 513
 514   def ExpandNames(self, lu):
 515     """Expand names for this query.
 516
 517     See L{LogicalUnit.ExpandNames}.
 518
 519     """
 520     raise NotImplementedError()
 521
 522   def DeclareLocks(self, lu, level):
 523     """Declare locks for this query.
 524
 525     See L{LogicalUnit.DeclareLocks}.
 526
 527     """
 528     raise NotImplementedError()
 529
 530   def _GetQueryData(self, lu):
 531     """Collects all data for this query.
 532
 533     @return: Query data object
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def NewStyleQuery(self, lu):
 539     """Collect data and execute query.
 540
 541     """
 542     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 543                                   sort_by_name=self.sort_by_name)
 544
 545   def OldStyleQuery(self, lu):
 546     """Collect data and execute query.
 547
 548     """
 549     return self.query.OldStyleQuery(self._GetQueryData(lu),
 550                                     sort_by_name=self.sort_by_name)
 551
 552
 553 def _ShareAll():
 554   """Returns a dict declaring all lock levels shared.
 555
 556   """
 557   return dict.fromkeys(locking.LEVELS, 1)
 558
 559
 560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 561   """Checks if the owned node groups are still correct for an instance.
 562
 563   @type cfg: L{config.ConfigWriter}
 564   @param cfg: The cluster configuration
 565   @type instance_name: string
 566   @param instance_name: Instance name
 567   @type owned_groups: set or frozenset
 568   @param owned_groups: List of currently owned node groups
 569
 570   """
 571   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 572
 573   if not owned_groups.issuperset(inst_groups):
 574     raise errors.OpPrereqError("Instance %s's node groups changed since"
 575                                " locks were acquired, current groups are"
 576                                " are '%s', owning groups '%s'; retry the"
 577                                " operation" %
 578                                (instance_name,
 579                                 utils.CommaJoin(inst_groups),
 580                                 utils.CommaJoin(owned_groups)),
 581                                errors.ECODE_STATE)
 582
 583   return inst_groups
 584
 585
 586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 587   """Checks if the instances in a node group are still correct.
 588
 589   @type cfg: L{config.ConfigWriter}
 590   @param cfg: The cluster configuration
 591   @type group_uuid: string
 592   @param group_uuid: Node group UUID
 593   @type owned_instances: set or frozenset
 594   @param owned_instances: List of currently owned instances
 595
 596   """
 597   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 598   if owned_instances != wanted_instances:
 599     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 600                                " locks were acquired, wanted '%s', have '%s';"
 601                                " retry the operation" %
 602                                (group_uuid,
 603                                 utils.CommaJoin(wanted_instances),
 604                                 utils.CommaJoin(owned_instances)),
 605                                errors.ECODE_STATE)
 606
 607   return wanted_instances
 608
 609
 610 def _SupportsOob(cfg, node):
 611   """Tells if node supports OOB.
 612
 613   @type cfg: L{config.ConfigWriter}
 614   @param cfg: The cluster configuration
 615   @type node: L{objects.Node}
 616   @param node: The node
 617   @return: The OOB script if supported or an empty string otherwise
 618
 619   """
 620   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 621
 622
 623 def _GetWantedNodes(lu, nodes):
 624   """Returns list of checked and expanded node names.
 625
 626   @type lu: L{LogicalUnit}
 627   @param lu: the logical unit on whose behalf we execute
 628   @type nodes: list
 629   @param nodes: list of node names or None for all nodes
 630   @rtype: list
 631   @return: the list of nodes, sorted
 632   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 633
 634   """
 635   if nodes:
 636     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 637
 638   return utils.NiceSort(lu.cfg.GetNodeList())
 639
 640
 641 def _GetWantedInstances(lu, instances):
 642   """Returns list of checked and expanded instance names.
 643
 644   @type lu: L{LogicalUnit}
 645   @param lu: the logical unit on whose behalf we execute
 646   @type instances: list
 647   @param instances: list of instance names or None for all instances
 648   @rtype: list
 649   @return: the list of instances, sorted
 650   @raise errors.OpPrereqError: if the instances parameter is wrong type
 651   @raise errors.OpPrereqError: if any of the passed instances is not found
 652
 653   """
 654   if instances:
 655     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 656   else:
 657     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 658   return wanted
 659
 660
 661 def _GetUpdatedParams(old_params, update_dict,
 662                       use_default=True, use_none=False):
 663   """Return the new version of a parameter dictionary.
 664
 665   @type old_params: dict
 666   @param old_params: old parameters
 667   @type update_dict: dict
 668   @param update_dict: dict containing new parameter values, or
 669       constants.VALUE_DEFAULT to reset the parameter to its default
 670       value
 671   @param use_default: boolean
 672   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 673       values as 'to be deleted' values
 674   @param use_none: boolean
 675   @type use_none: whether to recognise C{None} values as 'to be
 676       deleted' values
 677   @rtype: dict
 678   @return: the new parameter dictionary
 679
 680   """
 681   params_copy = copy.deepcopy(old_params)
 682   for key, val in update_dict.iteritems():
 683     if ((use_default and val == constants.VALUE_DEFAULT) or
 684         (use_none and val is None)):
 685       try:
 686         del params_copy[key]
 687       except KeyError:
 688         pass
 689     else:
 690       params_copy[key] = val
 691   return params_copy
 692
 693
 694 def _ReleaseLocks(lu, level, names=None, keep=None):
 695   """Releases locks owned by an LU.
 696
 697   @type lu: L{LogicalUnit}
 698   @param level: Lock level
 699   @type names: list or None
 700   @param names: Names of locks to release
 701   @type keep: list or None
 702   @param keep: Names of locks to retain
 703
 704   """
 705   assert not (keep is not None and names is not None), \
 706          "Only one of the 'names' and the 'keep' parameters can be given"
 707
 708   if names is not None:
 709     should_release = names.__contains__
 710   elif keep:
 711     should_release = lambda name: name not in keep
 712   else:
 713     should_release = None
 714
 715   if should_release:
 716     retain = []
 717     release = []
 718
 719     # Determine which locks to release
 720     for name in lu.owned_locks(level):
 721       if should_release(name):
 722         release.append(name)
 723       else:
 724         retain.append(name)
 725
 726     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 727
 728     # Release just some locks
 729     lu.glm.release(level, names=release)
 730
 731     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 732   else:
 733     # Release everything
 734     lu.glm.release(level)
 735
 736     assert not lu.glm.is_owned(level), "No locks should be owned"
 737
 738
 739 def _MapInstanceDisksToNodes(instances):
 740   """Creates a map from (node, volume) to instance name.
 741
 742   @type instances: list of L{objects.Instance}
 743   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 744
 745   """
 746   return dict(((node, vol), inst.name)
 747               for inst in instances
 748               for (node, vols) in inst.MapLVsByNode().items()
 749               for vol in vols)
 750
 751
 752 def _RunPostHook(lu, node_name):
 753   """Runs the post-hook for an opcode on a single node.
 754
 755   """
 756   hm = lu.proc.BuildHooksManager(lu)
 757   try:
 758     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 759   except:
 760     # pylint: disable=W0702
 761     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 762
 763
 764 def _CheckOutputFields(static, dynamic, selected):
 765   """Checks whether all selected fields are valid.
 766
 767   @type static: L{utils.FieldSet}
 768   @param static: static fields set
 769   @type dynamic: L{utils.FieldSet}
 770   @param dynamic: dynamic fields set
 771
 772   """
 773   f = utils.FieldSet()
 774   f.Extend(static)
 775   f.Extend(dynamic)
 776
 777   delta = f.NonMatching(selected)
 778   if delta:
 779     raise errors.OpPrereqError("Unknown output fields selected: %s"
 780                                % ",".join(delta), errors.ECODE_INVAL)
 781
 782
 783 def _CheckGlobalHvParams(params):
 784   """Validates that given hypervisor params are not global ones.
 785
 786   This will ensure that instances don't get customised versions of
 787   global params.
 788
 789   """
 790   used_globals = constants.HVC_GLOBALS.intersection(params)
 791   if used_globals:
 792     msg = ("The following hypervisor parameters are global and cannot"
 793            " be customized at instance level, please modify them at"
 794            " cluster level: %s" % utils.CommaJoin(used_globals))
 795     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 796
 797
 798 def _CheckNodeOnline(lu, node, msg=None):
 799   """Ensure that a given node is online.
 800
 801   @param lu: the LU on behalf of which we make the check
 802   @param node: the node to check
 803   @param msg: if passed, should be a message to replace the default one
 804   @raise errors.OpPrereqError: if the node is offline
 805
 806   """
 807   if msg is None:
 808     msg = "Can't use offline node"
 809   if lu.cfg.GetNodeInfo(node).offline:
 810     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 811
 812
 813 def _CheckNodeNotDrained(lu, node):
 814   """Ensure that a given node is not drained.
 815
 816   @param lu: the LU on behalf of which we make the check
 817   @param node: the node to check
 818   @raise errors.OpPrereqError: if the node is drained
 819
 820   """
 821   if lu.cfg.GetNodeInfo(node).drained:
 822     raise errors.OpPrereqError("Can't use drained node %s" % node,
 823                                errors.ECODE_STATE)
 824
 825
 826 def _CheckNodeVmCapable(lu, node):
 827   """Ensure that a given node is vm capable.
 828
 829   @param lu: the LU on behalf of which we make the check
 830   @param node: the node to check
 831   @raise errors.OpPrereqError: if the node is not vm capable
 832
 833   """
 834   if not lu.cfg.GetNodeInfo(node).vm_capable:
 835     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 836                                errors.ECODE_STATE)
 837
 838
 839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 840   """Ensure that a node supports a given OS.
 841
 842   @param lu: the LU on behalf of which we make the check
 843   @param node: the node to check
 844   @param os_name: the OS to query about
 845   @param force_variant: whether to ignore variant errors
 846   @raise errors.OpPrereqError: if the node is not supporting the OS
 847
 848   """
 849   result = lu.rpc.call_os_get(node, os_name)
 850   result.Raise("OS '%s' not in supported OS list for node %s" %
 851                (os_name, node),
 852                prereq=True, ecode=errors.ECODE_INVAL)
 853   if not force_variant:
 854     _CheckOSVariant(result.payload, os_name)
 855
 856
 857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 858   """Ensure that a node has the given secondary ip.
 859
 860   @type lu: L{LogicalUnit}
 861   @param lu: the LU on behalf of which we make the check
 862   @type node: string
 863   @param node: the node to check
 864   @type secondary_ip: string
 865   @param secondary_ip: the ip to check
 866   @type prereq: boolean
 867   @param prereq: whether to throw a prerequisite or an execute error
 868   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 869   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 870
 871   """
 872   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 873   result.Raise("Failure checking secondary ip on node %s" % node,
 874                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 875   if not result.payload:
 876     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 877            " please fix and re-run this command" % secondary_ip)
 878     if prereq:
 879       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 880     else:
 881       raise errors.OpExecError(msg)
 882
 883
 884 def _GetClusterDomainSecret():
 885   """Reads the cluster domain secret.
 886
 887   """
 888   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 889                                strict=True)
 890
 891
 892 def _CheckInstanceDown(lu, instance, reason):
 893   """Ensure that an instance is not running."""
 894   if instance.admin_up:
 895     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 896                                (instance.name, reason), errors.ECODE_STATE)
 897
 898   pnode = instance.primary_node
 899   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 900   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 901               prereq=True, ecode=errors.ECODE_ENVIRON)
 902
 903   if instance.name in ins_l.payload:
 904     raise errors.OpPrereqError("Instance %s is running, %s" %
 905                                (instance.name, reason), errors.ECODE_STATE)
 906
 907
 908 def _ExpandItemName(fn, name, kind):
 909   """Expand an item name.
 910
 911   @param fn: the function to use for expansion
 912   @param name: requested item name
 913   @param kind: text description ('Node' or 'Instance')
 914   @return: the resolved (full) name
 915   @raise errors.OpPrereqError: if the item is not found
 916
 917   """
 918   full_name = fn(name)
 919   if full_name is None:
 920     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 921                                errors.ECODE_NOENT)
 922   return full_name
 923
 924
 925 def _ExpandNodeName(cfg, name):
 926   """Wrapper over L{_ExpandItemName} for nodes."""
 927   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 928
 929
 930 def _ExpandInstanceName(cfg, name):
 931   """Wrapper over L{_ExpandItemName} for instance."""
 932   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 933
 934
 935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 936                           memory, vcpus, nics, disk_template, disks,
 937                           bep, hvp, hypervisor_name, tags):
 938   """Builds instance related env variables for hooks
 939
 940   This builds the hook environment from individual variables.
 941
 942   @type name: string
 943   @param name: the name of the instance
 944   @type primary_node: string
 945   @param primary_node: the name of the instance's primary node
 946   @type secondary_nodes: list
 947   @param secondary_nodes: list of secondary nodes as strings
 948   @type os_type: string
 949   @param os_type: the name of the instance's OS
 950   @type status: boolean
 951   @param status: the should_run status of the instance
 952   @type memory: string
 953   @param memory: the memory size of the instance
 954   @type vcpus: string
 955   @param vcpus: the count of VCPUs the instance has
 956   @type nics: list
 957   @param nics: list of tuples (ip, mac, mode, link) representing
 958       the NICs the instance has
 959   @type disk_template: string
 960   @param disk_template: the disk template of the instance
 961   @type disks: list
 962   @param disks: the list of (size, mode) pairs
 963   @type bep: dict
 964   @param bep: the backend parameters for the instance
 965   @type hvp: dict
 966   @param hvp: the hypervisor parameters for the instance
 967   @type hypervisor_name: string
 968   @param hypervisor_name: the hypervisor for the instance
 969   @type tags: list
 970   @param tags: list of instance tags as strings
 971   @rtype: dict
 972   @return: the hook environment for this instance
 973
 974   """
 975   if status:
 976     str_status = "up"
 977   else:
 978     str_status = "down"
 979   env = {
 980     "OP_TARGET": name,
 981     "INSTANCE_NAME": name,
 982     "INSTANCE_PRIMARY": primary_node,
 983     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 984     "INSTANCE_OS_TYPE": os_type,
 985     "INSTANCE_STATUS": str_status,
 986     "INSTANCE_MEMORY": memory,
 987     "INSTANCE_VCPUS": vcpus,
 988     "INSTANCE_DISK_TEMPLATE": disk_template,
 989     "INSTANCE_HYPERVISOR": hypervisor_name,
 990   }
 991
 992   if nics:
 993     nic_count = len(nics)
 994     for idx, (ip, mac, mode, link) in enumerate(nics):
 995       if ip is None:
 996         ip = ""
 997       env["INSTANCE_NIC%d_IP" % idx] = ip
 998       env["INSTANCE_NIC%d_MAC" % idx] = mac
 999       env["INSTANCE_NIC%d_MODE" % idx] = mode
1000       env["INSTANCE_NIC%d_LINK" % idx] = link
1001       if mode == constants.NIC_MODE_BRIDGED:
1002         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003   else:
1004     nic_count = 0
1005
1006   env["INSTANCE_NIC_COUNT"] = nic_count
1007
1008   if disks:
1009     disk_count = len(disks)
1010     for idx, (size, mode) in enumerate(disks):
1011       env["INSTANCE_DISK%d_SIZE" % idx] = size
1012       env["INSTANCE_DISK%d_MODE" % idx] = mode
1013   else:
1014     disk_count = 0
1015
1016   env["INSTANCE_DISK_COUNT"] = disk_count
1017
1018   if not tags:
1019     tags = []
1020
1021   env["INSTANCE_TAGS"] = " ".join(tags)
1022
1023   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024     for key, value in source.items():
1025       env["INSTANCE_%s_%s" % (kind, key)] = value
1026
1027   return env
1028
1029
1030 def _NICListToTuple(lu, nics):
1031   """Build a list of nic information tuples.
1032
1033   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034   value in LUInstanceQueryData.
1035
1036   @type lu:  L{LogicalUnit}
1037   @param lu: the logical unit on whose behalf we execute
1038   @type nics: list of L{objects.NIC}
1039   @param nics: list of nics to convert to hooks tuples
1040
1041   """
1042   hooks_nics = []
1043   cluster = lu.cfg.GetClusterInfo()
1044   for nic in nics:
1045     ip = nic.ip
1046     mac = nic.mac
1047     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048     mode = filled_params[constants.NIC_MODE]
1049     link = filled_params[constants.NIC_LINK]
1050     hooks_nics.append((ip, mac, mode, link))
1051   return hooks_nics
1052
1053
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055   """Builds instance related env variables for hooks from an object.
1056
1057   @type lu: L{LogicalUnit}
1058   @param lu: the logical unit on whose behalf we execute
1059   @type instance: L{objects.Instance}
1060   @param instance: the instance for which we should build the
1061       environment
1062   @type override: dict
1063   @param override: dictionary with key/values that will override
1064       our values
1065   @rtype: dict
1066   @return: the hook environment dictionary
1067
1068   """
1069   cluster = lu.cfg.GetClusterInfo()
1070   bep = cluster.FillBE(instance)
1071   hvp = cluster.FillHV(instance)
1072   args = {
1073     "name": instance.name,
1074     "primary_node": instance.primary_node,
1075     "secondary_nodes": instance.secondary_nodes,
1076     "os_type": instance.os,
1077     "status": instance.admin_up,
1078     "memory": bep[constants.BE_MEMORY],
1079     "vcpus": bep[constants.BE_VCPUS],
1080     "nics": _NICListToTuple(lu, instance.nics),
1081     "disk_template": instance.disk_template,
1082     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083     "bep": bep,
1084     "hvp": hvp,
1085     "hypervisor_name": instance.hypervisor,
1086     "tags": instance.tags,
1087   }
1088   if override:
1089     args.update(override)
1090   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091
1092
1093 def _AdjustCandidatePool(lu, exceptions):
1094   """Adjust the candidate pool after node operations.
1095
1096   """
1097   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098   if mod_list:
1099     lu.LogInfo("Promoted nodes to master candidate role: %s",
1100                utils.CommaJoin(node.name for node in mod_list))
1101     for name in mod_list:
1102       lu.context.ReaddNode(name)
1103   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104   if mc_now > mc_max:
1105     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106                (mc_now, mc_max))
1107
1108
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110   """Decide whether I should promote myself as a master candidate.
1111
1112   """
1113   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115   # the new node will increase mc_max with one, so:
1116   mc_should = min(mc_should + 1, cp_size)
1117   return mc_now < mc_should
1118
1119
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121   """Check that the brigdes needed by a list of nics exist.
1122
1123   """
1124   cluster = lu.cfg.GetClusterInfo()
1125   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126   brlist = [params[constants.NIC_LINK] for params in paramslist
1127             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128   if brlist:
1129     result = lu.rpc.call_bridges_exist(target_node, brlist)
1130     result.Raise("Error checking bridges on destination node '%s'" %
1131                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132
1133
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135   """Check that the brigdes needed by an instance exist.
1136
1137   """
1138   if node is None:
1139     node = instance.primary_node
1140   _CheckNicsBridgesExist(lu, instance.nics, node)
1141
1142
1143 def _CheckOSVariant(os_obj, name):
1144   """Check whether an OS name conforms to the os variants specification.
1145
1146   @type os_obj: L{objects.OS}
1147   @param os_obj: OS object to check
1148   @type name: string
1149   @param name: OS name passed by the user, to check for validity
1150
1151   """
1152   variant = objects.OS.GetVariant(name)
1153   if not os_obj.supported_variants:
1154     if variant:
1155       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156                                  " passed)" % (os_obj.name, variant),
1157                                  errors.ECODE_INVAL)
1158     return
1159   if not variant:
1160     raise errors.OpPrereqError("OS name must include a variant",
1161                                errors.ECODE_INVAL)
1162
1163   if variant not in os_obj.supported_variants:
1164     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165
1166
1167 def _GetNodeInstancesInner(cfg, fn):
1168   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169
1170
1171 def _GetNodeInstances(cfg, node_name):
1172   """Returns a list of all primary and secondary instances on a node.
1173
1174   """
1175
1176   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177
1178
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180   """Returns primary instances on a node.
1181
1182   """
1183   return _GetNodeInstancesInner(cfg,
1184                                 lambda inst: node_name == inst.primary_node)
1185
1186
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188   """Returns secondary instances on a node.
1189
1190   """
1191   return _GetNodeInstancesInner(cfg,
1192                                 lambda inst: node_name in inst.secondary_nodes)
1193
1194
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196   """Returns the arguments for a storage type.
1197
1198   """
1199   # Special case for file storage
1200   if storage_type == constants.ST_FILE:
1201     # storage.FileStorage wants a list of storage directories
1202     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203
1204   return []
1205
1206
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208   faulty = []
1209
1210   for dev in instance.disks:
1211     cfg.SetDiskID(dev, node_name)
1212
1213   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214   result.Raise("Failed to get disk status from node %s" % node_name,
1215                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216
1217   for idx, bdev_status in enumerate(result.payload):
1218     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219       faulty.append(idx)
1220
1221   return faulty
1222
1223
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225   """Check the sanity of iallocator and node arguments and use the
1226   cluster-wide iallocator if appropriate.
1227
1228   Check that at most one of (iallocator, node) is specified. If none is
1229   specified, then the LU's opcode's iallocator slot is filled with the
1230   cluster-wide default iallocator.
1231
1232   @type iallocator_slot: string
1233   @param iallocator_slot: the name of the opcode iallocator slot
1234   @type node_slot: string
1235   @param node_slot: the name of the opcode target node slot
1236
1237   """
1238   node = getattr(lu.op, node_slot, None)
1239   iallocator = getattr(lu.op, iallocator_slot, None)
1240
1241   if node is not None and iallocator is not None:
1242     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243                                errors.ECODE_INVAL)
1244   elif node is None and iallocator is None:
1245     default_iallocator = lu.cfg.GetDefaultIAllocator()
1246     if default_iallocator:
1247       setattr(lu.op, iallocator_slot, default_iallocator)
1248     else:
1249       raise errors.OpPrereqError("No iallocator or node given and no"
1250                                  " cluster-wide default iallocator found;"
1251                                  " please specify either an iallocator or a"
1252                                  " node, or set a cluster-wide default"
1253                                  " iallocator")
1254
1255
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257   """Decides on which iallocator to use.
1258
1259   @type cfg: L{config.ConfigWriter}
1260   @param cfg: Cluster configuration object
1261   @type iallocator: string or None
1262   @param iallocator: Iallocator specified in opcode
1263   @rtype: string
1264   @return: Iallocator name
1265
1266   """
1267   if not iallocator:
1268     # Use default iallocator
1269     iallocator = cfg.GetDefaultIAllocator()
1270
1271   if not iallocator:
1272     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273                                " opcode nor as a cluster-wide default",
1274                                errors.ECODE_INVAL)
1275
1276   return iallocator
1277
1278
1279 class LUClusterPostInit(LogicalUnit):
1280   """Logical unit for running hooks after cluster initialization.
1281
1282   """
1283   HPATH = "cluster-init"
1284   HTYPE = constants.HTYPE_CLUSTER
1285
1286   def BuildHooksEnv(self):
1287     """Build hooks env.
1288
1289     """
1290     return {
1291       "OP_TARGET": self.cfg.GetClusterName(),
1292       }
1293
1294   def BuildHooksNodes(self):
1295     """Build hooks nodes.
1296
1297     """
1298     return ([], [self.cfg.GetMasterNode()])
1299
1300   def Exec(self, feedback_fn):
1301     """Nothing to do.
1302
1303     """
1304     return True
1305
1306
1307 class LUClusterDestroy(LogicalUnit):
1308   """Logical unit for destroying the cluster.
1309
1310   """
1311   HPATH = "cluster-destroy"
1312   HTYPE = constants.HTYPE_CLUSTER
1313
1314   def BuildHooksEnv(self):
1315     """Build hooks env.
1316
1317     """
1318     return {
1319       "OP_TARGET": self.cfg.GetClusterName(),
1320       }
1321
1322   def BuildHooksNodes(self):
1323     """Build hooks nodes.
1324
1325     """
1326     return ([], [])
1327
1328   def CheckPrereq(self):
1329     """Check prerequisites.
1330
1331     This checks whether the cluster is empty.
1332
1333     Any errors are signaled by raising errors.OpPrereqError.
1334
1335     """
1336     master = self.cfg.GetMasterNode()
1337
1338     nodelist = self.cfg.GetNodeList()
1339     if len(nodelist) != 1 or nodelist[0] != master:
1340       raise errors.OpPrereqError("There are still %d node(s) in"
1341                                  " this cluster." % (len(nodelist) - 1),
1342                                  errors.ECODE_INVAL)
1343     instancelist = self.cfg.GetInstanceList()
1344     if instancelist:
1345       raise errors.OpPrereqError("There are still %d instance(s) in"
1346                                  " this cluster." % len(instancelist),
1347                                  errors.ECODE_INVAL)
1348
1349   def Exec(self, feedback_fn):
1350     """Destroys the cluster.
1351
1352     """
1353     master = self.cfg.GetMasterNode()
1354
1355     # Run post hooks on master node before it's removed
1356     _RunPostHook(self, master)
1357
1358     result = self.rpc.call_node_deactivate_master_ip(master)
1359     result.Raise("Could not disable the master role")
1360
1361     return master
1362
1363
1364 def _VerifyCertificate(filename):
1365   """Verifies a certificate for L{LUClusterVerifyConfig}.
1366
1367   @type filename: string
1368   @param filename: Path to PEM file
1369
1370   """
1371   try:
1372     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373                                            utils.ReadFile(filename))
1374   except Exception, err: # pylint: disable=W0703
1375     return (LUClusterVerifyConfig.ETYPE_ERROR,
1376             "Failed to load X509 certificate %s: %s" % (filename, err))
1377
1378   (errcode, msg) = \
1379     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380                                 constants.SSL_CERT_EXPIRATION_ERROR)
1381
1382   if msg:
1383     fnamemsg = "While verifying %s: %s" % (filename, msg)
1384   else:
1385     fnamemsg = None
1386
1387   if errcode is None:
1388     return (None, fnamemsg)
1389   elif errcode == utils.CERT_WARNING:
1390     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391   elif errcode == utils.CERT_ERROR:
1392     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393
1394   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395
1396
1397 def _GetAllHypervisorParameters(cluster, instances):
1398   """Compute the set of all hypervisor parameters.
1399
1400   @type cluster: L{objects.Cluster}
1401   @param cluster: the cluster object
1402   @param instances: list of L{objects.Instance}
1403   @param instances: additional instances from which to obtain parameters
1404   @rtype: list of (origin, hypervisor, parameters)
1405   @return: a list with all parameters found, indicating the hypervisor they
1406        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407
1408   """
1409   hvp_data = []
1410
1411   for hv_name in cluster.enabled_hypervisors:
1412     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413
1414   for os_name, os_hvp in cluster.os_hvp.items():
1415     for hv_name, hv_params in os_hvp.items():
1416       if hv_params:
1417         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419
1420   # TODO: collapse identical parameter values in a single one
1421   for instance in instances:
1422     if instance.hvparams:
1423       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424                        cluster.FillHV(instance)))
1425
1426   return hvp_data
1427
1428
1429 class _VerifyErrors(object):
1430   """Mix-in for cluster/group verify LUs.
1431
1432   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433   self.op and self._feedback_fn to be available.)
1434
1435   """
1436   TCLUSTER = "cluster"
1437   TNODE = "node"
1438   TINSTANCE = "instance"
1439
1440   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443   ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444   ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452   ENODEDRBD = (TNODE, "ENODEDRBD")
1453   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456   ENODEHV = (TNODE, "ENODEHV")
1457   ENODELVM = (TNODE, "ENODELVM")
1458   ENODEN1 = (TNODE, "ENODEN1")
1459   ENODENET = (TNODE, "ENODENET")
1460   ENODEOS = (TNODE, "ENODEOS")
1461   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463   ENODERPC = (TNODE, "ENODERPC")
1464   ENODESSH = (TNODE, "ENODESSH")
1465   ENODEVERSION = (TNODE, "ENODEVERSION")
1466   ENODESETUP = (TNODE, "ENODESETUP")
1467   ENODETIME = (TNODE, "ENODETIME")
1468   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469
1470   ETYPE_FIELD = "code"
1471   ETYPE_ERROR = "ERROR"
1472   ETYPE_WARNING = "WARNING"
1473
1474   def _Error(self, ecode, item, msg, *args, **kwargs):
1475     """Format an error message.
1476
1477     Based on the opcode's error_codes parameter, either format a
1478     parseable error code, or a simpler error string.
1479
1480     This must be called only from Exec and functions called from Exec.
1481
1482     """
1483     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484     itype, etxt = ecode
1485     # first complete the msg
1486     if args:
1487       msg = msg % args
1488     # then format the whole message
1489     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491     else:
1492       if item:
1493         item = " " + item
1494       else:
1495         item = ""
1496       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497     # and finally report it via the feedback_fn
1498     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499
1500   def _ErrorIf(self, cond, *args, **kwargs):
1501     """Log an error message if the passed condition is True.
1502
1503     """
1504     cond = (bool(cond)
1505             or self.op.debug_simulate_errors) # pylint: disable=E1101
1506     if cond:
1507       self._Error(*args, **kwargs)
1508     # do not mark the operation as failed for WARN cases only
1509     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510       self.bad = self.bad or cond
1511
1512
1513 class LUClusterVerify(NoHooksLU):
1514   """Submits all jobs necessary to verify the cluster.
1515
1516   """
1517   REQ_BGL = False
1518
1519   def ExpandNames(self):
1520     self.needed_locks = {}
1521
1522   def Exec(self, feedback_fn):
1523     jobs = []
1524
1525     if self.op.group_name:
1526       groups = [self.op.group_name]
1527       depends_fn = lambda: None
1528     else:
1529       groups = self.cfg.GetNodeGroupList()
1530
1531       # Verify global configuration
1532       jobs.append([opcodes.OpClusterVerifyConfig()])
1533
1534       # Always depend on global verification
1535       depends_fn = lambda: [(-len(jobs), [])]
1536
1537     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538                                               depends=depends_fn())]
1539                 for group in groups)
1540
1541     # Fix up all parameters
1542     for op in itertools.chain(*jobs): # pylint: disable=W0142
1543       op.debug_simulate_errors = self.op.debug_simulate_errors
1544       op.verbose = self.op.verbose
1545       op.error_codes = self.op.error_codes
1546       try:
1547         op.skip_checks = self.op.skip_checks
1548       except AttributeError:
1549         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550
1551     return ResultWithJobs(jobs)
1552
1553
1554 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555   """Verifies the cluster config.
1556
1557   """
1558   REQ_BGL = True
1559
1560   def _VerifyHVP(self, hvp_data):
1561     """Verifies locally the syntax of the hypervisor parameters.
1562
1563     """
1564     for item, hv_name, hv_params in hvp_data:
1565       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566              (item, hv_name))
1567       try:
1568         hv_class = hypervisor.GetHypervisor(hv_name)
1569         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570         hv_class.CheckParameterSyntax(hv_params)
1571       except errors.GenericError, err:
1572         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573
1574   def ExpandNames(self):
1575     # Information can be safely retrieved as the BGL is acquired in exclusive
1576     # mode
1577     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579     self.all_node_info = self.cfg.GetAllNodesInfo()
1580     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581     self.needed_locks = {}
1582
1583   def Exec(self, feedback_fn):
1584     """Verify integrity of cluster, performing various test on nodes.
1585
1586     """
1587     self.bad = False
1588     self._feedback_fn = feedback_fn
1589
1590     feedback_fn("* Verifying cluster config")
1591
1592     for msg in self.cfg.VerifyConfig():
1593       self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594
1595     feedback_fn("* Verifying cluster certificate files")
1596
1597     for cert_filename in constants.ALL_CERT_FILES:
1598       (errcode, msg) = _VerifyCertificate(cert_filename)
1599       self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600
1601     feedback_fn("* Verifying hypervisor parameters")
1602
1603     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604                                                 self.all_inst_info.values()))
1605
1606     feedback_fn("* Verifying all nodes belong to an existing group")
1607
1608     # We do this verification here because, should this bogus circumstance
1609     # occur, it would never be caught by VerifyGroup, which only acts on
1610     # nodes/instances reachable from existing node groups.
1611
1612     dangling_nodes = set(node.name for node in self.all_node_info.values()
1613                          if node.group not in self.all_group_info)
1614
1615     dangling_instances = {}
1616     no_node_instances = []
1617
1618     for inst in self.all_inst_info.values():
1619       if inst.primary_node in dangling_nodes:
1620         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621       elif inst.primary_node not in self.all_node_info:
1622         no_node_instances.append(inst.name)
1623
1624     pretty_dangling = [
1625         "%s (%s)" %
1626         (node.name,
1627          utils.CommaJoin(dangling_instances.get(node.name,
1628                                                 ["no instances"])))
1629         for node in dangling_nodes]
1630
1631     self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632                   "the following nodes (and their instances) belong to a non"
1633                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1634
1635     self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636                   "the following instances have a non-existing primary-node:"
1637                   " %s", utils.CommaJoin(no_node_instances))
1638
1639     return not self.bad
1640
1641
1642 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643   """Verifies the status of a node group.
1644
1645   """
1646   HPATH = "cluster-verify"
1647   HTYPE = constants.HTYPE_CLUSTER
1648   REQ_BGL = False
1649
1650   _HOOKS_INDENT_RE = re.compile("^", re.M)
1651
1652   class NodeImage(object):
1653     """A class representing the logical and physical status of a node.
1654
1655     @type name: string
1656     @ivar name: the node name to which this object refers
1657     @ivar volumes: a structure as returned from
1658         L{ganeti.backend.GetVolumeList} (runtime)
1659     @ivar instances: a list of running instances (runtime)
1660     @ivar pinst: list of configured primary instances (config)
1661     @ivar sinst: list of configured secondary instances (config)
1662     @ivar sbp: dictionary of {primary-node: list of instances} for all
1663         instances for which this node is secondary (config)
1664     @ivar mfree: free memory, as reported by hypervisor (runtime)
1665     @ivar dfree: free disk, as reported by the node (runtime)
1666     @ivar offline: the offline status (config)
1667     @type rpc_fail: boolean
1668     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669         not whether the individual keys were correct) (runtime)
1670     @type lvm_fail: boolean
1671     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672     @type hyp_fail: boolean
1673     @ivar hyp_fail: whether the RPC call didn't return the instance list
1674     @type ghost: boolean
1675     @ivar ghost: whether this is a known node or not (config)
1676     @type os_fail: boolean
1677     @ivar os_fail: whether the RPC call didn't return valid OS data
1678     @type oslist: list
1679     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680     @type vm_capable: boolean
1681     @ivar vm_capable: whether the node can host instances
1682
1683     """
1684     def __init__(self, offline=False, name=None, vm_capable=True):
1685       self.name = name
1686       self.volumes = {}
1687       self.instances = []
1688       self.pinst = []
1689       self.sinst = []
1690       self.sbp = {}
1691       self.mfree = 0
1692       self.dfree = 0
1693       self.offline = offline
1694       self.vm_capable = vm_capable
1695       self.rpc_fail = False
1696       self.lvm_fail = False
1697       self.hyp_fail = False
1698       self.ghost = False
1699       self.os_fail = False
1700       self.oslist = {}
1701
1702   def ExpandNames(self):
1703     # This raises errors.OpPrereqError on its own:
1704     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705
1706     # Get instances in node group; this is unsafe and needs verification later
1707     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1708
1709     self.needed_locks = {
1710       locking.LEVEL_INSTANCE: inst_names,
1711       locking.LEVEL_NODEGROUP: [self.group_uuid],
1712       locking.LEVEL_NODE: [],
1713       }
1714
1715     self.share_locks = _ShareAll()
1716
1717   def DeclareLocks(self, level):
1718     if level == locking.LEVEL_NODE:
1719       # Get members of node group; this is unsafe and needs verification later
1720       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1721
1722       all_inst_info = self.cfg.GetAllInstancesInfo()
1723
1724       # In Exec(), we warn about mirrored instances that have primary and
1725       # secondary living in separate node groups. To fully verify that
1726       # volumes for these instances are healthy, we will need to do an
1727       # extra call to their secondaries. We ensure here those nodes will
1728       # be locked.
1729       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730         # Important: access only the instances whose lock is owned
1731         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732           nodes.update(all_inst_info[inst].secondary_nodes)
1733
1734       self.needed_locks[locking.LEVEL_NODE] = nodes
1735
1736   def CheckPrereq(self):
1737     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1739
1740     group_nodes = set(self.group_info.members)
1741     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1742
1743     unlocked_nodes = \
1744         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1745
1746     unlocked_instances = \
1747         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1748
1749     if unlocked_nodes:
1750       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751                                  utils.CommaJoin(unlocked_nodes))
1752
1753     if unlocked_instances:
1754       raise errors.OpPrereqError("Missing lock for instances: %s" %
1755                                  utils.CommaJoin(unlocked_instances))
1756
1757     self.all_node_info = self.cfg.GetAllNodesInfo()
1758     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1759
1760     self.my_node_names = utils.NiceSort(group_nodes)
1761     self.my_inst_names = utils.NiceSort(group_instances)
1762
1763     self.my_node_info = dict((name, self.all_node_info[name])
1764                              for name in self.my_node_names)
1765
1766     self.my_inst_info = dict((name, self.all_inst_info[name])
1767                              for name in self.my_inst_names)
1768
1769     # We detect here the nodes that will need the extra RPC calls for verifying
1770     # split LV volumes; they should be locked.
1771     extra_lv_nodes = set()
1772
1773     for inst in self.my_inst_info.values():
1774       if inst.disk_template in constants.DTS_INT_MIRROR:
1775         group = self.my_node_info[inst.primary_node].group
1776         for nname in inst.secondary_nodes:
1777           if self.all_node_info[nname].group != group:
1778             extra_lv_nodes.add(nname)
1779
1780     unlocked_lv_nodes = \
1781         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1782
1783     if unlocked_lv_nodes:
1784       raise errors.OpPrereqError("these nodes could be locked: %s" %
1785                                  utils.CommaJoin(unlocked_lv_nodes))
1786     self.extra_lv_nodes = list(extra_lv_nodes)
1787
1788   def _VerifyNode(self, ninfo, nresult):
1789     """Perform some basic validation on data returned from a node.
1790
1791       - check the result data structure is well formed and has all the
1792         mandatory fields
1793       - check ganeti version
1794
1795     @type ninfo: L{objects.Node}
1796     @param ninfo: the node to check
1797     @param nresult: the results from the node
1798     @rtype: boolean
1799     @return: whether overall this call was successful (and we can expect
1800          reasonable values in the respose)
1801
1802     """
1803     node = ninfo.name
1804     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1805
1806     # main result, nresult should be a non-empty dict
1807     test = not nresult or not isinstance(nresult, dict)
1808     _ErrorIf(test, self.ENODERPC, node,
1809                   "unable to verify node: no data returned")
1810     if test:
1811       return False
1812
1813     # compares ganeti version
1814     local_version = constants.PROTOCOL_VERSION
1815     remote_version = nresult.get("version", None)
1816     test = not (remote_version and
1817                 isinstance(remote_version, (list, tuple)) and
1818                 len(remote_version) == 2)
1819     _ErrorIf(test, self.ENODERPC, node,
1820              "connection to node returned invalid data")
1821     if test:
1822       return False
1823
1824     test = local_version != remote_version[0]
1825     _ErrorIf(test, self.ENODEVERSION, node,
1826              "incompatible protocol versions: master %s,"
1827              " node %s", local_version, remote_version[0])
1828     if test:
1829       return False
1830
1831     # node seems compatible, we can actually try to look into its results
1832
1833     # full package version
1834     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835                   self.ENODEVERSION, node,
1836                   "software version mismatch: master %s, node %s",
1837                   constants.RELEASE_VERSION, remote_version[1],
1838                   code=self.ETYPE_WARNING)
1839
1840     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841     if ninfo.vm_capable and isinstance(hyp_result, dict):
1842       for hv_name, hv_result in hyp_result.iteritems():
1843         test = hv_result is not None
1844         _ErrorIf(test, self.ENODEHV, node,
1845                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1846
1847     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848     if ninfo.vm_capable and isinstance(hvp_result, list):
1849       for item, hv_name, hv_result in hvp_result:
1850         _ErrorIf(True, self.ENODEHV, node,
1851                  "hypervisor %s parameter verify failure (source %s): %s",
1852                  hv_name, item, hv_result)
1853
1854     test = nresult.get(constants.NV_NODESETUP,
1855                        ["Missing NODESETUP results"])
1856     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1857              "; ".join(test))
1858
1859     return True
1860
1861   def _VerifyNodeTime(self, ninfo, nresult,
1862                       nvinfo_starttime, nvinfo_endtime):
1863     """Check the node time.
1864
1865     @type ninfo: L{objects.Node}
1866     @param ninfo: the node to check
1867     @param nresult: the remote results for the node
1868     @param nvinfo_starttime: the start time of the RPC call
1869     @param nvinfo_endtime: the end time of the RPC call
1870
1871     """
1872     node = ninfo.name
1873     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1874
1875     ntime = nresult.get(constants.NV_TIME, None)
1876     try:
1877       ntime_merged = utils.MergeTime(ntime)
1878     except (ValueError, TypeError):
1879       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1880       return
1881
1882     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1886     else:
1887       ntime_diff = None
1888
1889     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890              "Node time diverges by at least %s from master node time",
1891              ntime_diff)
1892
1893   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894     """Check the node LVM results.
1895
1896     @type ninfo: L{objects.Node}
1897     @param ninfo: the node to check
1898     @param nresult: the remote results for the node
1899     @param vg_name: the configured VG name
1900
1901     """
1902     if vg_name is None:
1903       return
1904
1905     node = ninfo.name
1906     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1907
1908     # checks vg existence and size > 20G
1909     vglist = nresult.get(constants.NV_VGLIST, None)
1910     test = not vglist
1911     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1912     if not test:
1913       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914                                             constants.MIN_VG_SIZE)
1915       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1916
1917     # check pv names
1918     pvlist = nresult.get(constants.NV_PVLIST, None)
1919     test = pvlist is None
1920     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1921     if not test:
1922       # check that ':' is not present in PV names, since it's a
1923       # special character for lvcreate (denotes the range of PEs to
1924       # use on the PV)
1925       for _, pvname, owner_vg in pvlist:
1926         test = ":" in pvname
1927         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928                  " '%s' of VG '%s'", pvname, owner_vg)
1929
1930   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931     """Check the node bridges.
1932
1933     @type ninfo: L{objects.Node}
1934     @param ninfo: the node to check
1935     @param nresult: the remote results for the node
1936     @param bridges: the expected list of bridges
1937
1938     """
1939     if not bridges:
1940       return
1941
1942     node = ninfo.name
1943     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1944
1945     missing = nresult.get(constants.NV_BRIDGES, None)
1946     test = not isinstance(missing, list)
1947     _ErrorIf(test, self.ENODENET, node,
1948              "did not return valid bridge information")
1949     if not test:
1950       _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951                utils.CommaJoin(sorted(missing)))
1952
1953   def _VerifyNodeNetwork(self, ninfo, nresult):
1954     """Check the node network connectivity results.
1955
1956     @type ninfo: L{objects.Node}
1957     @param ninfo: the node to check
1958     @param nresult: the remote results for the node
1959
1960     """
1961     node = ninfo.name
1962     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1963
1964     test = constants.NV_NODELIST not in nresult
1965     _ErrorIf(test, self.ENODESSH, node,
1966              "node hasn't returned node ssh connectivity data")
1967     if not test:
1968       if nresult[constants.NV_NODELIST]:
1969         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970           _ErrorIf(True, self.ENODESSH, node,
1971                    "ssh communication with node '%s': %s", a_node, a_msg)
1972
1973     test = constants.NV_NODENETTEST not in nresult
1974     _ErrorIf(test, self.ENODENET, node,
1975              "node hasn't returned node tcp connectivity data")
1976     if not test:
1977       if nresult[constants.NV_NODENETTEST]:
1978         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1979         for anode in nlist:
1980           _ErrorIf(True, self.ENODENET, node,
1981                    "tcp communication with node '%s': %s",
1982                    anode, nresult[constants.NV_NODENETTEST][anode])
1983
1984     test = constants.NV_MASTERIP not in nresult
1985     _ErrorIf(test, self.ENODENET, node,
1986              "node hasn't returned node master IP reachability data")
1987     if not test:
1988       if not nresult[constants.NV_MASTERIP]:
1989         if node == self.master_node:
1990           msg = "the master node cannot reach the master IP (not configured?)"
1991         else:
1992           msg = "cannot reach the master IP"
1993         _ErrorIf(True, self.ENODENET, node, msg)
1994
1995   def _VerifyInstance(self, instance, instanceconfig, node_image,
1996                       diskstatus):
1997     """Verify an instance.
1998
1999     This function checks to see if the required block devices are
2000     available on the instance's node.
2001
2002     """
2003     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004     node_current = instanceconfig.primary_node
2005
2006     node_vol_should = {}
2007     instanceconfig.MapLVsByNode(node_vol_should)
2008
2009     for node in node_vol_should:
2010       n_img = node_image[node]
2011       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012         # ignore missing volumes on offline or broken nodes
2013         continue
2014       for volume in node_vol_should[node]:
2015         test = volume not in n_img.volumes
2016         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017                  "volume %s missing on node %s", volume, node)
2018
2019     if instanceconfig.admin_up:
2020       pri_img = node_image[node_current]
2021       test = instance not in pri_img.instances and not pri_img.offline
2022       _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023                "instance not running on its primary node %s",
2024                node_current)
2025
2026     diskdata = [(nname, success, status, idx)
2027                 for (nname, disks) in diskstatus.items()
2028                 for idx, (success, status) in enumerate(disks)]
2029
2030     for nname, success, bdev_status, idx in diskdata:
2031       # the 'ghost node' construction in Exec() ensures that we have a
2032       # node here
2033       snode = node_image[nname]
2034       bad_snode = snode.ghost or snode.offline
2035       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036                self.EINSTANCEFAULTYDISK, instance,
2037                "couldn't retrieve status for disk/%s on %s: %s",
2038                idx, nname, bdev_status)
2039       _ErrorIf((instanceconfig.admin_up and success and
2040                 bdev_status.ldisk_status == constants.LDS_FAULTY),
2041                self.EINSTANCEFAULTYDISK, instance,
2042                "disk/%s on %s is faulty", idx, nname)
2043
2044   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045     """Verify if there are any unknown volumes in the cluster.
2046
2047     The .os, .swap and backup volumes are ignored. All other volumes are
2048     reported as unknown.
2049
2050     @type reserved: L{ganeti.utils.FieldSet}
2051     @param reserved: a FieldSet of reserved volume names
2052
2053     """
2054     for node, n_img in node_image.items():
2055       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056         # skip non-healthy nodes
2057         continue
2058       for volume in n_img.volumes:
2059         test = ((node not in node_vol_should or
2060                 volume not in node_vol_should[node]) and
2061                 not reserved.Matches(volume))
2062         self._ErrorIf(test, self.ENODEORPHANLV, node,
2063                       "volume %s is unknown", volume)
2064
2065   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066     """Verify N+1 Memory Resilience.
2067
2068     Check that if one single node dies we can still start all the
2069     instances it was primary for.
2070
2071     """
2072     cluster_info = self.cfg.GetClusterInfo()
2073     for node, n_img in node_image.items():
2074       # This code checks that every node which is now listed as
2075       # secondary has enough memory to host all instances it is
2076       # supposed to should a single other node in the cluster fail.
2077       # FIXME: not ready for failover to an arbitrary node
2078       # FIXME: does not support file-backed instances
2079       # WARNING: we currently take into account down instances as well
2080       # as up ones, considering that even if they're down someone
2081       # might want to start them even in the event of a node failure.
2082       if n_img.offline:
2083         # we're skipping offline nodes from the N+1 warning, since
2084         # most likely we don't have good memory infromation from them;
2085         # we already list instances living on such nodes, and that's
2086         # enough warning
2087         continue
2088       for prinode, instances in n_img.sbp.items():
2089         needed_mem = 0
2090         for instance in instances:
2091           bep = cluster_info.FillBE(instance_cfg[instance])
2092           if bep[constants.BE_AUTO_BALANCE]:
2093             needed_mem += bep[constants.BE_MEMORY]
2094         test = n_img.mfree < needed_mem
2095         self._ErrorIf(test, self.ENODEN1, node,
2096                       "not enough memory to accomodate instance failovers"
2097                       " should node %s fail (%dMiB needed, %dMiB available)",
2098                       prinode, needed_mem, n_img.mfree)
2099
2100   @classmethod
2101   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102                    (files_all, files_opt, files_mc, files_vm)):
2103     """Verifies file checksums collected from all nodes.
2104
2105     @param errorif: Callback for reporting errors
2106     @param nodeinfo: List of L{objects.Node} objects
2107     @param master_node: Name of master node
2108     @param all_nvinfo: RPC results
2109
2110     """
2111     # Define functions determining which nodes to consider for a file
2112     files2nodefn = [
2113       (files_all, None),
2114       (files_mc, lambda node: (node.master_candidate or
2115                                node.name == master_node)),
2116       (files_vm, lambda node: node.vm_capable),
2117       ]
2118
2119     # Build mapping from filename to list of nodes which should have the file
2120     nodefiles = {}
2121     for (files, fn) in files2nodefn:
2122       if fn is None:
2123         filenodes = nodeinfo
2124       else:
2125         filenodes = filter(fn, nodeinfo)
2126       nodefiles.update((filename,
2127                         frozenset(map(operator.attrgetter("name"), filenodes)))
2128                        for filename in files)
2129
2130     assert set(nodefiles) == (files_all | files_mc | files_vm)
2131
2132     fileinfo = dict((filename, {}) for filename in nodefiles)
2133     ignore_nodes = set()
2134
2135     for node in nodeinfo:
2136       if node.offline:
2137         ignore_nodes.add(node.name)
2138         continue
2139
2140       nresult = all_nvinfo[node.name]
2141
2142       if nresult.fail_msg or not nresult.payload:
2143         node_files = None
2144       else:
2145         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2146
2147       test = not (node_files and isinstance(node_files, dict))
2148       errorif(test, cls.ENODEFILECHECK, node.name,
2149               "Node did not return file checksum data")
2150       if test:
2151         ignore_nodes.add(node.name)
2152         continue
2153
2154       # Build per-checksum mapping from filename to nodes having it
2155       for (filename, checksum) in node_files.items():
2156         assert filename in nodefiles
2157         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2158
2159     for (filename, checksums) in fileinfo.items():
2160       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2161
2162       # Nodes having the file
2163       with_file = frozenset(node_name
2164                             for nodes in fileinfo[filename].values()
2165                             for node_name in nodes) - ignore_nodes
2166
2167       expected_nodes = nodefiles[filename] - ignore_nodes
2168
2169       # Nodes missing file
2170       missing_file = expected_nodes - with_file
2171
2172       if filename in files_opt:
2173         # All or no nodes
2174         errorif(missing_file and missing_file != expected_nodes,
2175                 cls.ECLUSTERFILECHECK, None,
2176                 "File %s is optional, but it must exist on all or no"
2177                 " nodes (not found on %s)",
2178                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2179       else:
2180         # Non-optional files
2181         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2182                 "File %s is missing from node(s) %s", filename,
2183                 utils.CommaJoin(utils.NiceSort(missing_file)))
2184
2185         # Warn if a node has a file it shouldn't
2186         unexpected = with_file - expected_nodes
2187         errorif(unexpected,
2188                 cls.ECLUSTERFILECHECK, None,
2189                 "File %s should not exist on node(s) %s",
2190                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2191
2192       # See if there are multiple versions of the file
2193       test = len(checksums) > 1
2194       if test:
2195         variants = ["variant %s on %s" %
2196                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2197                     for (idx, (checksum, nodes)) in
2198                       enumerate(sorted(checksums.items()))]
2199       else:
2200         variants = []
2201
2202       errorif(test, cls.ECLUSTERFILECHECK, None,
2203               "File %s found with %s different checksums (%s)",
2204               filename, len(checksums), "; ".join(variants))
2205
2206   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2207                       drbd_map):
2208     """Verifies and the node DRBD status.
2209
2210     @type ninfo: L{objects.Node}
2211     @param ninfo: the node to check
2212     @param nresult: the remote results for the node
2213     @param instanceinfo: the dict of instances
2214     @param drbd_helper: the configured DRBD usermode helper
2215     @param drbd_map: the DRBD map as returned by
2216         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2217
2218     """
2219     node = ninfo.name
2220     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2221
2222     if drbd_helper:
2223       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2224       test = (helper_result == None)
2225       _ErrorIf(test, self.ENODEDRBDHELPER, node,
2226                "no drbd usermode helper returned")
2227       if helper_result:
2228         status, payload = helper_result
2229         test = not status
2230         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2231                  "drbd usermode helper check unsuccessful: %s", payload)
2232         test = status and (payload != drbd_helper)
2233         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2234                  "wrong drbd usermode helper: %s", payload)
2235
2236     # compute the DRBD minors
2237     node_drbd = {}
2238     for minor, instance in drbd_map[node].items():
2239       test = instance not in instanceinfo
2240       _ErrorIf(test, self.ECLUSTERCFG, None,
2241                "ghost instance '%s' in temporary DRBD map", instance)
2242         # ghost instance should not be running, but otherwise we
2243         # don't give double warnings (both ghost instance and
2244         # unallocated minor in use)
2245       if test:
2246         node_drbd[minor] = (instance, False)
2247       else:
2248         instance = instanceinfo[instance]
2249         node_drbd[minor] = (instance.name, instance.admin_up)
2250
2251     # and now check them
2252     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2253     test = not isinstance(used_minors, (tuple, list))
2254     _ErrorIf(test, self.ENODEDRBD, node,
2255              "cannot parse drbd status file: %s", str(used_minors))
2256     if test:
2257       # we cannot check drbd status
2258       return
2259
2260     for minor, (iname, must_exist) in node_drbd.items():
2261       test = minor not in used_minors and must_exist
2262       _ErrorIf(test, self.ENODEDRBD, node,
2263                "drbd minor %d of instance %s is not active", minor, iname)
2264     for minor in used_minors:
2265       test = minor not in node_drbd
2266       _ErrorIf(test, self.ENODEDRBD, node,
2267                "unallocated drbd minor %d is in use", minor)
2268
2269   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2270     """Builds the node OS structures.
2271
2272     @type ninfo: L{objects.Node}
2273     @param ninfo: the node to check
2274     @param nresult: the remote results for the node
2275     @param nimg: the node image object
2276
2277     """
2278     node = ninfo.name
2279     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2280
2281     remote_os = nresult.get(constants.NV_OSLIST, None)
2282     test = (not isinstance(remote_os, list) or
2283             not compat.all(isinstance(v, list) and len(v) == 7
2284                            for v in remote_os))
2285
2286     _ErrorIf(test, self.ENODEOS, node,
2287              "node hasn't returned valid OS data")
2288
2289     nimg.os_fail = test
2290
2291     if test:
2292       return
2293
2294     os_dict = {}
2295
2296     for (name, os_path, status, diagnose,
2297          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2298
2299       if name not in os_dict:
2300         os_dict[name] = []
2301
2302       # parameters is a list of lists instead of list of tuples due to
2303       # JSON lacking a real tuple type, fix it:
2304       parameters = [tuple(v) for v in parameters]
2305       os_dict[name].append((os_path, status, diagnose,
2306                             set(variants), set(parameters), set(api_ver)))
2307
2308     nimg.oslist = os_dict
2309
2310   def _VerifyNodeOS(self, ninfo, nimg, base):
2311     """Verifies the node OS list.
2312
2313     @type ninfo: L{objects.Node}
2314     @param ninfo: the node to check
2315     @param nimg: the node image object
2316     @param base: the 'template' node we match against (e.g. from the master)
2317
2318     """
2319     node = ninfo.name
2320     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2321
2322     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2323
2324     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2325     for os_name, os_data in nimg.oslist.items():
2326       assert os_data, "Empty OS status for OS %s?!" % os_name
2327       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2328       _ErrorIf(not f_status, self.ENODEOS, node,
2329                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2330       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2331                "OS '%s' has multiple entries (first one shadows the rest): %s",
2332                os_name, utils.CommaJoin([v[0] for v in os_data]))
2333       # comparisons with the 'base' image
2334       test = os_name not in base.oslist
2335       _ErrorIf(test, self.ENODEOS, node,
2336                "Extra OS %s not present on reference node (%s)",
2337                os_name, base.name)
2338       if test:
2339         continue
2340       assert base.oslist[os_name], "Base node has empty OS status?"
2341       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2342       if not b_status:
2343         # base OS is invalid, skipping
2344         continue
2345       for kind, a, b in [("API version", f_api, b_api),
2346                          ("variants list", f_var, b_var),
2347                          ("parameters", beautify_params(f_param),
2348                           beautify_params(b_param))]:
2349         _ErrorIf(a != b, self.ENODEOS, node,
2350                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2351                  kind, os_name, base.name,
2352                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2353
2354     # check any missing OSes
2355     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2356     _ErrorIf(missing, self.ENODEOS, node,
2357              "OSes present on reference node %s but missing on this node: %s",
2358              base.name, utils.CommaJoin(missing))
2359
2360   def _VerifyOob(self, ninfo, nresult):
2361     """Verifies out of band functionality of a node.
2362
2363     @type ninfo: L{objects.Node}
2364     @param ninfo: the node to check
2365     @param nresult: the remote results for the node
2366
2367     """
2368     node = ninfo.name
2369     # We just have to verify the paths on master and/or master candidates
2370     # as the oob helper is invoked on the master
2371     if ((ninfo.master_candidate or ninfo.master_capable) and
2372         constants.NV_OOB_PATHS in nresult):
2373       for path_result in nresult[constants.NV_OOB_PATHS]:
2374         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2375
2376   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2377     """Verifies and updates the node volume data.
2378
2379     This function will update a L{NodeImage}'s internal structures
2380     with data from the remote call.
2381
2382     @type ninfo: L{objects.Node}
2383     @param ninfo: the node to check
2384     @param nresult: the remote results for the node
2385     @param nimg: the node image object
2386     @param vg_name: the configured VG name
2387
2388     """
2389     node = ninfo.name
2390     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2391
2392     nimg.lvm_fail = True
2393     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2394     if vg_name is None:
2395       pass
2396     elif isinstance(lvdata, basestring):
2397       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2398                utils.SafeEncode(lvdata))
2399     elif not isinstance(lvdata, dict):
2400       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2401     else:
2402       nimg.volumes = lvdata
2403       nimg.lvm_fail = False
2404
2405   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2406     """Verifies and updates the node instance list.
2407
2408     If the listing was successful, then updates this node's instance
2409     list. Otherwise, it marks the RPC call as failed for the instance
2410     list key.
2411
2412     @type ninfo: L{objects.Node}
2413     @param ninfo: the node to check
2414     @param nresult: the remote results for the node
2415     @param nimg: the node image object
2416
2417     """
2418     idata = nresult.get(constants.NV_INSTANCELIST, None)
2419     test = not isinstance(idata, list)
2420     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2421                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2422     if test:
2423       nimg.hyp_fail = True
2424     else:
2425       nimg.instances = idata
2426
2427   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2428     """Verifies and computes a node information map
2429
2430     @type ninfo: L{objects.Node}
2431     @param ninfo: the node to check
2432     @param nresult: the remote results for the node
2433     @param nimg: the node image object
2434     @param vg_name: the configured VG name
2435
2436     """
2437     node = ninfo.name
2438     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2439
2440     # try to read free memory (from the hypervisor)
2441     hv_info = nresult.get(constants.NV_HVINFO, None)
2442     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2443     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2444     if not test:
2445       try:
2446         nimg.mfree = int(hv_info["memory_free"])
2447       except (ValueError, TypeError):
2448         _ErrorIf(True, self.ENODERPC, node,
2449                  "node returned invalid nodeinfo, check hypervisor")
2450
2451     # FIXME: devise a free space model for file based instances as well
2452     if vg_name is not None:
2453       test = (constants.NV_VGLIST not in nresult or
2454               vg_name not in nresult[constants.NV_VGLIST])
2455       _ErrorIf(test, self.ENODELVM, node,
2456                "node didn't return data for the volume group '%s'"
2457                " - it is either missing or broken", vg_name)
2458       if not test:
2459         try:
2460           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2461         except (ValueError, TypeError):
2462           _ErrorIf(True, self.ENODERPC, node,
2463                    "node returned invalid LVM info, check LVM status")
2464
2465   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2466     """Gets per-disk status information for all instances.
2467
2468     @type nodelist: list of strings
2469     @param nodelist: Node names
2470     @type node_image: dict of (name, L{objects.Node})
2471     @param node_image: Node objects
2472     @type instanceinfo: dict of (name, L{objects.Instance})
2473     @param instanceinfo: Instance objects
2474     @rtype: {instance: {node: [(succes, payload)]}}
2475     @return: a dictionary of per-instance dictionaries with nodes as
2476         keys and disk information as values; the disk information is a
2477         list of tuples (success, payload)
2478
2479     """
2480     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2481
2482     node_disks = {}
2483     node_disks_devonly = {}
2484     diskless_instances = set()
2485     diskless = constants.DT_DISKLESS
2486
2487     for nname in nodelist:
2488       node_instances = list(itertools.chain(node_image[nname].pinst,
2489                                             node_image[nname].sinst))
2490       diskless_instances.update(inst for inst in node_instances
2491                                 if instanceinfo[inst].disk_template == diskless)
2492       disks = [(inst, disk)
2493                for inst in node_instances
2494                for disk in instanceinfo[inst].disks]
2495
2496       if not disks:
2497         # No need to collect data
2498         continue
2499
2500       node_disks[nname] = disks
2501
2502       # Creating copies as SetDiskID below will modify the objects and that can
2503       # lead to incorrect data returned from nodes
2504       devonly = [dev.Copy() for (_, dev) in disks]
2505
2506       for dev in devonly:
2507         self.cfg.SetDiskID(dev, nname)
2508
2509       node_disks_devonly[nname] = devonly
2510
2511     assert len(node_disks) == len(node_disks_devonly)
2512
2513     # Collect data from all nodes with disks
2514     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2515                                                           node_disks_devonly)
2516
2517     assert len(result) == len(node_disks)
2518
2519     instdisk = {}
2520
2521     for (nname, nres) in result.items():
2522       disks = node_disks[nname]
2523
2524       if nres.offline:
2525         # No data from this node
2526         data = len(disks) * [(False, "node offline")]
2527       else:
2528         msg = nres.fail_msg
2529         _ErrorIf(msg, self.ENODERPC, nname,
2530                  "while getting disk information: %s", msg)
2531         if msg:
2532           # No data from this node
2533           data = len(disks) * [(False, msg)]
2534         else:
2535           data = []
2536           for idx, i in enumerate(nres.payload):
2537             if isinstance(i, (tuple, list)) and len(i) == 2:
2538               data.append(i)
2539             else:
2540               logging.warning("Invalid result from node %s, entry %d: %s",
2541                               nname, idx, i)
2542               data.append((False, "Invalid result from the remote node"))
2543
2544       for ((inst, _), status) in zip(disks, data):
2545         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2546
2547     # Add empty entries for diskless instances.
2548     for inst in diskless_instances:
2549       assert inst not in instdisk
2550       instdisk[inst] = {}
2551
2552     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2553                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2554                       compat.all(isinstance(s, (tuple, list)) and
2555                                  len(s) == 2 for s in statuses)
2556                       for inst, nnames in instdisk.items()
2557                       for nname, statuses in nnames.items())
2558     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2559
2560     return instdisk
2561
2562   @staticmethod
2563   def _SshNodeSelector(group_uuid, all_nodes):
2564     """Create endless iterators for all potential SSH check hosts.
2565
2566     """
2567     nodes = [node for node in all_nodes
2568              if (node.group != group_uuid and
2569                  not node.offline)]
2570     keyfunc = operator.attrgetter("group")
2571
2572     return map(itertools.cycle,
2573                [sorted(map(operator.attrgetter("name"), names))
2574                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2575                                                   keyfunc)])
2576
2577   @classmethod
2578   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2579     """Choose which nodes should talk to which other nodes.
2580
2581     We will make nodes contact all nodes in their group, and one node from
2582     every other group.
2583
2584     @warning: This algorithm has a known issue if one node group is much
2585       smaller than others (e.g. just one node). In such a case all other
2586       nodes will talk to the single node.
2587
2588     """
2589     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2590     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2591
2592     return (online_nodes,
2593             dict((name, sorted([i.next() for i in sel]))
2594                  for name in online_nodes))
2595
2596   def BuildHooksEnv(self):
2597     """Build hooks env.
2598
2599     Cluster-Verify hooks just ran in the post phase and their failure makes
2600     the output be logged in the verify output and the verification to fail.
2601
2602     """
2603     env = {
2604       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2605       }
2606
2607     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2608                for node in self.my_node_info.values())
2609
2610     return env
2611
2612   def BuildHooksNodes(self):
2613     """Build hooks nodes.
2614
2615     """
2616     return ([], self.my_node_names)
2617
2618   def Exec(self, feedback_fn):
2619     """Verify integrity of the node group, performing various test on nodes.
2620
2621     """
2622     # This method has too many local variables. pylint: disable=R0914
2623     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2624
2625     if not self.my_node_names:
2626       # empty node group
2627       feedback_fn("* Empty node group, skipping verification")
2628       return True
2629
2630     self.bad = False
2631     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2632     verbose = self.op.verbose
2633     self._feedback_fn = feedback_fn
2634
2635     vg_name = self.cfg.GetVGName()
2636     drbd_helper = self.cfg.GetDRBDHelper()
2637     cluster = self.cfg.GetClusterInfo()
2638     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2639     hypervisors = cluster.enabled_hypervisors
2640     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2641
2642     i_non_redundant = [] # Non redundant instances
2643     i_non_a_balanced = [] # Non auto-balanced instances
2644     n_offline = 0 # Count of offline nodes
2645     n_drained = 0 # Count of nodes being drained
2646     node_vol_should = {}
2647
2648     # FIXME: verify OS list
2649
2650     # File verification
2651     filemap = _ComputeAncillaryFiles(cluster, False)
2652
2653     # do local checksums
2654     master_node = self.master_node = self.cfg.GetMasterNode()
2655     master_ip = self.cfg.GetMasterIP()
2656
2657     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2658
2659     node_verify_param = {
2660       constants.NV_FILELIST:
2661         utils.UniqueSequence(filename
2662                              for files in filemap
2663                              for filename in files),
2664       constants.NV_NODELIST:
2665         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2666                                   self.all_node_info.values()),
2667       constants.NV_HYPERVISOR: hypervisors,
2668       constants.NV_HVPARAMS:
2669         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2670       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2671                                  for node in node_data_list
2672                                  if not node.offline],
2673       constants.NV_INSTANCELIST: hypervisors,
2674       constants.NV_VERSION: None,
2675       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2676       constants.NV_NODESETUP: None,
2677       constants.NV_TIME: None,
2678       constants.NV_MASTERIP: (master_node, master_ip),
2679       constants.NV_OSLIST: None,
2680       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2681       }
2682
2683     if vg_name is not None:
2684       node_verify_param[constants.NV_VGLIST] = None
2685       node_verify_param[constants.NV_LVLIST] = vg_name
2686       node_verify_param[constants.NV_PVLIST] = [vg_name]
2687       node_verify_param[constants.NV_DRBDLIST] = None
2688
2689     if drbd_helper:
2690       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2691
2692     # bridge checks
2693     # FIXME: this needs to be changed per node-group, not cluster-wide
2694     bridges = set()
2695     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2696     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2697       bridges.add(default_nicpp[constants.NIC_LINK])
2698     for instance in self.my_inst_info.values():
2699       for nic in instance.nics:
2700         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2701         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2702           bridges.add(full_nic[constants.NIC_LINK])
2703
2704     if bridges:
2705       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2706
2707     # Build our expected cluster state
2708     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2709                                                  name=node.name,
2710                                                  vm_capable=node.vm_capable))
2711                       for node in node_data_list)
2712
2713     # Gather OOB paths
2714     oob_paths = []
2715     for node in self.all_node_info.values():
2716       path = _SupportsOob(self.cfg, node)
2717       if path and path not in oob_paths:
2718         oob_paths.append(path)
2719
2720     if oob_paths:
2721       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2722
2723     for instance in self.my_inst_names:
2724       inst_config = self.my_inst_info[instance]
2725
2726       for nname in inst_config.all_nodes:
2727         if nname not in node_image:
2728           gnode = self.NodeImage(name=nname)
2729           gnode.ghost = (nname not in self.all_node_info)
2730           node_image[nname] = gnode
2731
2732       inst_config.MapLVsByNode(node_vol_should)
2733
2734       pnode = inst_config.primary_node
2735       node_image[pnode].pinst.append(instance)
2736
2737       for snode in inst_config.secondary_nodes:
2738         nimg = node_image[snode]
2739         nimg.sinst.append(instance)
2740         if pnode not in nimg.sbp:
2741           nimg.sbp[pnode] = []
2742         nimg.sbp[pnode].append(instance)
2743
2744     # At this point, we have the in-memory data structures complete,
2745     # except for the runtime information, which we'll gather next
2746
2747     # Due to the way our RPC system works, exact response times cannot be
2748     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2749     # time before and after executing the request, we can at least have a time
2750     # window.
2751     nvinfo_starttime = time.time()
2752     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2753                                            node_verify_param,
2754                                            self.cfg.GetClusterName())
2755     nvinfo_endtime = time.time()
2756
2757     if self.extra_lv_nodes and vg_name is not None:
2758       extra_lv_nvinfo = \
2759           self.rpc.call_node_verify(self.extra_lv_nodes,
2760                                     {constants.NV_LVLIST: vg_name},
2761                                     self.cfg.GetClusterName())
2762     else:
2763       extra_lv_nvinfo = {}
2764
2765     all_drbd_map = self.cfg.ComputeDRBDMap()
2766
2767     feedback_fn("* Gathering disk information (%s nodes)" %
2768                 len(self.my_node_names))
2769     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2770                                      self.my_inst_info)
2771
2772     feedback_fn("* Verifying configuration file consistency")
2773
2774     # If not all nodes are being checked, we need to make sure the master node
2775     # and a non-checked vm_capable node are in the list.
2776     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2777     if absent_nodes:
2778       vf_nvinfo = all_nvinfo.copy()
2779       vf_node_info = list(self.my_node_info.values())
2780       additional_nodes = []
2781       if master_node not in self.my_node_info:
2782         additional_nodes.append(master_node)
2783         vf_node_info.append(self.all_node_info[master_node])
2784       # Add the first vm_capable node we find which is not included
2785       for node in absent_nodes:
2786         nodeinfo = self.all_node_info[node]
2787         if nodeinfo.vm_capable and not nodeinfo.offline:
2788           additional_nodes.append(node)
2789           vf_node_info.append(self.all_node_info[node])
2790           break
2791       key = constants.NV_FILELIST
2792       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2793                                                  {key: node_verify_param[key]},
2794                                                  self.cfg.GetClusterName()))
2795     else:
2796       vf_nvinfo = all_nvinfo
2797       vf_node_info = self.my_node_info.values()
2798
2799     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2800
2801     feedback_fn("* Verifying node status")
2802
2803     refos_img = None
2804
2805     for node_i in node_data_list:
2806       node = node_i.name
2807       nimg = node_image[node]
2808
2809       if node_i.offline:
2810         if verbose:
2811           feedback_fn("* Skipping offline node %s" % (node,))
2812         n_offline += 1
2813         continue
2814
2815       if node == master_node:
2816         ntype = "master"
2817       elif node_i.master_candidate:
2818         ntype = "master candidate"
2819       elif node_i.drained:
2820         ntype = "drained"
2821         n_drained += 1
2822       else:
2823         ntype = "regular"
2824       if verbose:
2825         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2826
2827       msg = all_nvinfo[node].fail_msg
2828       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2829       if msg:
2830         nimg.rpc_fail = True
2831         continue
2832
2833       nresult = all_nvinfo[node].payload
2834
2835       nimg.call_ok = self._VerifyNode(node_i, nresult)
2836       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2837       self._VerifyNodeNetwork(node_i, nresult)
2838       self._VerifyOob(node_i, nresult)
2839
2840       if nimg.vm_capable:
2841         self._VerifyNodeLVM(node_i, nresult, vg_name)
2842         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2843                              all_drbd_map)
2844
2845         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2846         self._UpdateNodeInstances(node_i, nresult, nimg)
2847         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2848         self._UpdateNodeOS(node_i, nresult, nimg)
2849
2850         if not nimg.os_fail:
2851           if refos_img is None:
2852             refos_img = nimg
2853           self._VerifyNodeOS(node_i, nimg, refos_img)
2854         self._VerifyNodeBridges(node_i, nresult, bridges)
2855
2856         # Check whether all running instancies are primary for the node. (This
2857         # can no longer be done from _VerifyInstance below, since some of the
2858         # wrong instances could be from other node groups.)
2859         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2860
2861         for inst in non_primary_inst:
2862           test = inst in self.all_inst_info
2863           _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2864                    "instance should not run on node %s", node_i.name)
2865           _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2866                    "node is running unknown instance %s", inst)
2867
2868     for node, result in extra_lv_nvinfo.items():
2869       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2870                               node_image[node], vg_name)
2871
2872     feedback_fn("* Verifying instance status")
2873     for instance in self.my_inst_names:
2874       if verbose:
2875         feedback_fn("* Verifying instance %s" % instance)
2876       inst_config = self.my_inst_info[instance]
2877       self._VerifyInstance(instance, inst_config, node_image,
2878                            instdisk[instance])
2879       inst_nodes_offline = []
2880
2881       pnode = inst_config.primary_node
2882       pnode_img = node_image[pnode]
2883       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2884                self.ENODERPC, pnode, "instance %s, connection to"
2885                " primary node failed", instance)
2886
2887       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2888                self.EINSTANCEBADNODE, instance,
2889                "instance is marked as running and lives on offline node %s",
2890                inst_config.primary_node)
2891
2892       # If the instance is non-redundant we cannot survive losing its primary
2893       # node, so we are not N+1 compliant. On the other hand we have no disk
2894       # templates with more than one secondary so that situation is not well
2895       # supported either.
2896       # FIXME: does not support file-backed instances
2897       if not inst_config.secondary_nodes:
2898         i_non_redundant.append(instance)
2899
2900       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2901                instance, "instance has multiple secondary nodes: %s",
2902                utils.CommaJoin(inst_config.secondary_nodes),
2903                code=self.ETYPE_WARNING)
2904
2905       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2906         pnode = inst_config.primary_node
2907         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2908         instance_groups = {}
2909
2910         for node in instance_nodes:
2911           instance_groups.setdefault(self.all_node_info[node].group,
2912                                      []).append(node)
2913
2914         pretty_list = [
2915           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2916           # Sort so that we always list the primary node first.
2917           for group, nodes in sorted(instance_groups.items(),
2918                                      key=lambda (_, nodes): pnode in nodes,
2919                                      reverse=True)]
2920
2921         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2922                       instance, "instance has primary and secondary nodes in"
2923                       " different groups: %s", utils.CommaJoin(pretty_list),
2924                       code=self.ETYPE_WARNING)
2925
2926       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2927         i_non_a_balanced.append(instance)
2928
2929       for snode in inst_config.secondary_nodes:
2930         s_img = node_image[snode]
2931         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2932                  "instance %s, connection to secondary node failed", instance)
2933
2934         if s_img.offline:
2935           inst_nodes_offline.append(snode)
2936
2937       # warn that the instance lives on offline nodes
2938       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2939                "instance has offline secondary node(s) %s",
2940                utils.CommaJoin(inst_nodes_offline))
2941       # ... or ghost/non-vm_capable nodes
2942       for node in inst_config.all_nodes:
2943         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2944                  "instance lives on ghost node %s", node)
2945         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2946                  instance, "instance lives on non-vm_capable node %s", node)
2947
2948     feedback_fn("* Verifying orphan volumes")
2949     reserved = utils.FieldSet(*cluster.reserved_lvs)
2950
2951     # We will get spurious "unknown volume" warnings if any node of this group
2952     # is secondary for an instance whose primary is in another group. To avoid
2953     # them, we find these instances and add their volumes to node_vol_should.
2954     for inst in self.all_inst_info.values():
2955       for secondary in inst.secondary_nodes:
2956         if (secondary in self.my_node_info
2957             and inst.name not in self.my_inst_info):
2958           inst.MapLVsByNode(node_vol_should)
2959           break
2960
2961     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2962
2963     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2964       feedback_fn("* Verifying N+1 Memory redundancy")
2965       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2966
2967     feedback_fn("* Other Notes")
2968     if i_non_redundant:
2969       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2970                   % len(i_non_redundant))
2971
2972     if i_non_a_balanced:
2973       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2974                   % len(i_non_a_balanced))
2975
2976     if n_offline:
2977       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2978
2979     if n_drained:
2980       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2981
2982     return not self.bad
2983
2984   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2985     """Analyze the post-hooks' result
2986
2987     This method analyses the hook result, handles it, and sends some
2988     nicely-formatted feedback back to the user.
2989
2990     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2991         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2992     @param hooks_results: the results of the multi-node hooks rpc call
2993     @param feedback_fn: function used send feedback back to the caller
2994     @param lu_result: previous Exec result
2995     @return: the new Exec result, based on the previous result
2996         and hook results
2997
2998     """
2999     # We only really run POST phase hooks, only for non-empty groups,
3000     # and are only interested in their results
3001     if not self.my_node_names:
3002       # empty node group
3003       pass
3004     elif phase == constants.HOOKS_PHASE_POST:
3005       # Used to change hooks' output to proper indentation
3006       feedback_fn("* Hooks Results")
3007       assert hooks_results, "invalid result from hooks"
3008
3009       for node_name in hooks_results:
3010         res = hooks_results[node_name]
3011         msg = res.fail_msg
3012         test = msg and not res.offline
3013         self._ErrorIf(test, self.ENODEHOOKS, node_name,
3014                       "Communication failure in hooks execution: %s", msg)
3015         if res.offline or msg:
3016           # No need to investigate payload if node is offline or gave
3017           # an error.
3018           continue
3019         for script, hkr, output in res.payload:
3020           test = hkr == constants.HKR_FAIL
3021           self._ErrorIf(test, self.ENODEHOOKS, node_name,
3022                         "Script %s failed, output:", script)
3023           if test:
3024             output = self._HOOKS_INDENT_RE.sub("      ", output)
3025             feedback_fn("%s" % output)
3026             lu_result = False
3027
3028     return lu_result
3029
3030
3031 class LUClusterVerifyDisks(NoHooksLU):
3032   """Verifies the cluster disks status.
3033
3034   """
3035   REQ_BGL = False
3036
3037   def ExpandNames(self):
3038     self.share_locks = _ShareAll()
3039     self.needed_locks = {
3040       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3041       }
3042
3043   def Exec(self, feedback_fn):
3044     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3045
3046     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3047     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3048                            for group in group_names])
3049
3050
3051 class LUGroupVerifyDisks(NoHooksLU):
3052   """Verifies the status of all disks in a node group.
3053
3054   """
3055   REQ_BGL = False
3056
3057   def ExpandNames(self):
3058     # Raises errors.OpPrereqError on its own if group can't be found
3059     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3060
3061     self.share_locks = _ShareAll()
3062     self.needed_locks = {
3063       locking.LEVEL_INSTANCE: [],
3064       locking.LEVEL_NODEGROUP: [],
3065       locking.LEVEL_NODE: [],
3066       }
3067
3068   def DeclareLocks(self, level):
3069     if level == locking.LEVEL_INSTANCE:
3070       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3071
3072       # Lock instances optimistically, needs verification once node and group
3073       # locks have been acquired
3074       self.needed_locks[locking.LEVEL_INSTANCE] = \
3075         self.cfg.GetNodeGroupInstances(self.group_uuid)
3076
3077     elif level == locking.LEVEL_NODEGROUP:
3078       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3079
3080       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3081         set([self.group_uuid] +
3082             # Lock all groups used by instances optimistically; this requires
3083             # going via the node before it's locked, requiring verification
3084             # later on
3085             [group_uuid
3086              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3087              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3088
3089     elif level == locking.LEVEL_NODE:
3090       # This will only lock the nodes in the group to be verified which contain
3091       # actual instances
3092       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3093       self._LockInstancesNodes()
3094
3095       # Lock all nodes in group to be verified
3096       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3097       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3098       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3099
3100   def CheckPrereq(self):
3101     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3102     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3103     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3104
3105     assert self.group_uuid in owned_groups
3106
3107     # Check if locked instances are still correct
3108     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3109
3110     # Get instance information
3111     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3112
3113     # Check if node groups for locked instances are still correct
3114     for (instance_name, inst) in self.instances.items():
3115       assert owned_nodes.issuperset(inst.all_nodes), \
3116         "Instance %s's nodes changed while we kept the lock" % instance_name
3117
3118       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3119                                              owned_groups)
3120
3121       assert self.group_uuid in inst_groups, \
3122         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3123
3124   def Exec(self, feedback_fn):
3125     """Verify integrity of cluster disks.
3126
3127     @rtype: tuple of three items
3128     @return: a tuple of (dict of node-to-node_error, list of instances
3129         which need activate-disks, dict of instance: (node, volume) for
3130         missing volumes
3131
3132     """
3133     res_nodes = {}
3134     res_instances = set()
3135     res_missing = {}
3136
3137     nv_dict = _MapInstanceDisksToNodes([inst
3138                                         for inst in self.instances.values()
3139                                         if inst.admin_up])
3140
3141     if nv_dict:
3142       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3143                              set(self.cfg.GetVmCapableNodeList()))
3144
3145       node_lvs = self.rpc.call_lv_list(nodes, [])
3146
3147       for (node, node_res) in node_lvs.items():
3148         if node_res.offline:
3149           continue
3150
3151         msg = node_res.fail_msg
3152         if msg:
3153           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3154           res_nodes[node] = msg
3155           continue
3156
3157         for lv_name, (_, _, lv_online) in node_res.payload.items():
3158           inst = nv_dict.pop((node, lv_name), None)
3159           if not (lv_online or inst is None):
3160             res_instances.add(inst)
3161
3162       # any leftover items in nv_dict are missing LVs, let's arrange the data
3163       # better
3164       for key, inst in nv_dict.iteritems():
3165         res_missing.setdefault(inst, []).append(list(key))
3166
3167     return (res_nodes, list(res_instances), res_missing)
3168
3169
3170 class LUClusterRepairDiskSizes(NoHooksLU):
3171   """Verifies the cluster disks sizes.
3172
3173   """
3174   REQ_BGL = False
3175
3176   def ExpandNames(self):
3177     if self.op.instances:
3178       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3179       self.needed_locks = {
3180         locking.LEVEL_NODE: [],
3181         locking.LEVEL_INSTANCE: self.wanted_names,
3182         }
3183       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3184     else:
3185       self.wanted_names = None
3186       self.needed_locks = {
3187         locking.LEVEL_NODE: locking.ALL_SET,
3188         locking.LEVEL_INSTANCE: locking.ALL_SET,
3189         }
3190     self.share_locks = {
3191       locking.LEVEL_NODE: 1,
3192       locking.LEVEL_INSTANCE: 0,
3193       }
3194
3195   def DeclareLocks(self, level):
3196     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3197       self._LockInstancesNodes(primary_only=True)
3198
3199   def CheckPrereq(self):
3200     """Check prerequisites.
3201
3202     This only checks the optional instance list against the existing names.
3203
3204     """
3205     if self.wanted_names is None:
3206       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3207
3208     self.wanted_instances = \
3209         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3210
3211   def _EnsureChildSizes(self, disk):
3212     """Ensure children of the disk have the needed disk size.
3213
3214     This is valid mainly for DRBD8 and fixes an issue where the
3215     children have smaller disk size.
3216
3217     @param disk: an L{ganeti.objects.Disk} object
3218
3219     """
3220     if disk.dev_type == constants.LD_DRBD8:
3221       assert disk.children, "Empty children for DRBD8?"
3222       fchild = disk.children[0]
3223       mismatch = fchild.size < disk.size
3224       if mismatch:
3225         self.LogInfo("Child disk has size %d, parent %d, fixing",
3226                      fchild.size, disk.size)
3227         fchild.size = disk.size
3228
3229       # and we recurse on this child only, not on the metadev
3230       return self._EnsureChildSizes(fchild) or mismatch
3231     else:
3232       return False
3233
3234   def Exec(self, feedback_fn):
3235     """Verify the size of cluster disks.
3236
3237     """
3238     # TODO: check child disks too
3239     # TODO: check differences in size between primary/secondary nodes
3240     per_node_disks = {}
3241     for instance in self.wanted_instances:
3242       pnode = instance.primary_node
3243       if pnode not in per_node_disks:
3244         per_node_disks[pnode] = []
3245       for idx, disk in enumerate(instance.disks):
3246         per_node_disks[pnode].append((instance, idx, disk))
3247
3248     changed = []
3249     for node, dskl in per_node_disks.items():
3250       newl = [v[2].Copy() for v in dskl]
3251       for dsk in newl:
3252         self.cfg.SetDiskID(dsk, node)
3253       result = self.rpc.call_blockdev_getsize(node, newl)
3254       if result.fail_msg:
3255         self.LogWarning("Failure in blockdev_getsize call to node"
3256                         " %s, ignoring", node)
3257         continue
3258       if len(result.payload) != len(dskl):
3259         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3260                         " result.payload=%s", node, len(dskl), result.payload)
3261         self.LogWarning("Invalid result from node %s, ignoring node results",
3262                         node)
3263         continue
3264       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3265         if size is None:
3266           self.LogWarning("Disk %d of instance %s did not return size"
3267                           " information, ignoring", idx, instance.name)
3268           continue
3269         if not isinstance(size, (int, long)):
3270           self.LogWarning("Disk %d of instance %s did not return valid"
3271                           " size information, ignoring", idx, instance.name)
3272           continue
3273         size = size >> 20
3274         if size != disk.size:
3275           self.LogInfo("Disk %d of instance %s has mismatched size,"
3276                        " correcting: recorded %d, actual %d", idx,
3277                        instance.name, disk.size, size)
3278           disk.size = size
3279           self.cfg.Update(instance, feedback_fn)
3280           changed.append((instance.name, idx, size))
3281         if self._EnsureChildSizes(disk):
3282           self.cfg.Update(instance, feedback_fn)
3283           changed.append((instance.name, idx, disk.size))
3284     return changed
3285
3286
3287 class LUClusterRename(LogicalUnit):
3288   """Rename the cluster.
3289
3290   """
3291   HPATH = "cluster-rename"
3292   HTYPE = constants.HTYPE_CLUSTER
3293
3294   def BuildHooksEnv(self):
3295     """Build hooks env.
3296
3297     """
3298     return {
3299       "OP_TARGET": self.cfg.GetClusterName(),
3300       "NEW_NAME": self.op.name,
3301       }
3302
3303   def BuildHooksNodes(self):
3304     """Build hooks nodes.
3305
3306     """
3307     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3308
3309   def CheckPrereq(self):
3310     """Verify that the passed name is a valid one.
3311
3312     """
3313     hostname = netutils.GetHostname(name=self.op.name,
3314                                     family=self.cfg.GetPrimaryIPFamily())
3315
3316     new_name = hostname.name
3317     self.ip = new_ip = hostname.ip
3318     old_name = self.cfg.GetClusterName()
3319     old_ip = self.cfg.GetMasterIP()
3320     if new_name == old_name and new_ip == old_ip:
3321       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3322                                  " cluster has changed",
3323                                  errors.ECODE_INVAL)
3324     if new_ip != old_ip:
3325       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3326         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3327                                    " reachable on the network" %
3328                                    new_ip, errors.ECODE_NOTUNIQUE)
3329
3330     self.op.name = new_name
3331
3332   def Exec(self, feedback_fn):
3333     """Rename the cluster.
3334
3335     """
3336     clustername = self.op.name
3337     ip = self.ip
3338
3339     # shutdown the master IP
3340     master = self.cfg.GetMasterNode()
3341     result = self.rpc.call_node_deactivate_master_ip(master)
3342     result.Raise("Could not disable the master role")
3343
3344     try:
3345       cluster = self.cfg.GetClusterInfo()
3346       cluster.cluster_name = clustername
3347       cluster.master_ip = ip
3348       self.cfg.Update(cluster, feedback_fn)
3349
3350       # update the known hosts file
3351       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3352       node_list = self.cfg.GetOnlineNodeList()
3353       try:
3354         node_list.remove(master)
3355       except ValueError:
3356         pass
3357       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3358     finally:
3359       result = self.rpc.call_node_activate_master_ip(master)
3360       msg = result.fail_msg
3361       if msg:
3362         self.LogWarning("Could not re-enable the master role on"
3363                         " the master, please restart manually: %s", msg)
3364
3365     return clustername
3366
3367
3368 class LUClusterSetParams(LogicalUnit):
3369   """Change the parameters of the cluster.
3370
3371   """
3372   HPATH = "cluster-modify"
3373   HTYPE = constants.HTYPE_CLUSTER
3374   REQ_BGL = False
3375
3376   def CheckArguments(self):
3377     """Check parameters
3378
3379     """
3380     if self.op.uid_pool:
3381       uidpool.CheckUidPool(self.op.uid_pool)
3382
3383     if self.op.add_uids:
3384       uidpool.CheckUidPool(self.op.add_uids)
3385
3386     if self.op.remove_uids:
3387       uidpool.CheckUidPool(self.op.remove_uids)
3388
3389   def ExpandNames(self):
3390     # FIXME: in the future maybe other cluster params won't require checking on
3391     # all nodes to be modified.
3392     self.needed_locks = {
3393       locking.LEVEL_NODE: locking.ALL_SET,
3394     }
3395     self.share_locks[locking.LEVEL_NODE] = 1
3396
3397   def BuildHooksEnv(self):
3398     """Build hooks env.
3399
3400     """
3401     return {
3402       "OP_TARGET": self.cfg.GetClusterName(),
3403       "NEW_VG_NAME": self.op.vg_name,
3404       }
3405
3406   def BuildHooksNodes(self):
3407     """Build hooks nodes.
3408
3409     """
3410     mn = self.cfg.GetMasterNode()
3411     return ([mn], [mn])
3412
3413   def CheckPrereq(self):
3414     """Check prerequisites.
3415
3416     This checks whether the given params don't conflict and
3417     if the given volume group is valid.
3418
3419     """
3420     if self.op.vg_name is not None and not self.op.vg_name:
3421       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3422         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3423                                    " instances exist", errors.ECODE_INVAL)
3424
3425     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3426       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3427         raise errors.OpPrereqError("Cannot disable drbd helper while"
3428                                    " drbd-based instances exist",
3429                                    errors.ECODE_INVAL)
3430
3431     node_list = self.owned_locks(locking.LEVEL_NODE)
3432
3433     # if vg_name not None, checks given volume group on all nodes
3434     if self.op.vg_name:
3435       vglist = self.rpc.call_vg_list(node_list)
3436       for node in node_list:
3437         msg = vglist[node].fail_msg
3438         if msg:
3439           # ignoring down node
3440           self.LogWarning("Error while gathering data on node %s"
3441                           " (ignoring node): %s", node, msg)
3442           continue
3443         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3444                                               self.op.vg_name,
3445                                               constants.MIN_VG_SIZE)
3446         if vgstatus:
3447           raise errors.OpPrereqError("Error on node '%s': %s" %
3448                                      (node, vgstatus), errors.ECODE_ENVIRON)
3449
3450     if self.op.drbd_helper:
3451       # checks given drbd helper on all nodes
3452       helpers = self.rpc.call_drbd_helper(node_list)
3453       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3454         if ninfo.offline:
3455           self.LogInfo("Not checking drbd helper on offline node %s", node)
3456           continue
3457         msg = helpers[node].fail_msg
3458         if msg:
3459           raise errors.OpPrereqError("Error checking drbd helper on node"
3460                                      " '%s': %s" % (node, msg),
3461                                      errors.ECODE_ENVIRON)
3462         node_helper = helpers[node].payload
3463         if node_helper != self.op.drbd_helper:
3464           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3465                                      (node, node_helper), errors.ECODE_ENVIRON)
3466
3467     self.cluster = cluster = self.cfg.GetClusterInfo()
3468     # validate params changes
3469     if self.op.beparams:
3470       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3471       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3472
3473     if self.op.ndparams:
3474       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3475       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3476
3477       # TODO: we need a more general way to handle resetting
3478       # cluster-level parameters to default values
3479       if self.new_ndparams["oob_program"] == "":
3480         self.new_ndparams["oob_program"] = \
3481             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3482
3483     if self.op.nicparams:
3484       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3485       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3486       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3487       nic_errors = []
3488
3489       # check all instances for consistency
3490       for instance in self.cfg.GetAllInstancesInfo().values():
3491         for nic_idx, nic in enumerate(instance.nics):
3492           params_copy = copy.deepcopy(nic.nicparams)
3493           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3494
3495           # check parameter syntax
3496           try:
3497             objects.NIC.CheckParameterSyntax(params_filled)
3498           except errors.ConfigurationError, err:
3499             nic_errors.append("Instance %s, nic/%d: %s" %
3500                               (instance.name, nic_idx, err))
3501
3502           # if we're moving instances to routed, check that they have an ip
3503           target_mode = params_filled[constants.NIC_MODE]
3504           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3505             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3506                               " address" % (instance.name, nic_idx))
3507       if nic_errors:
3508         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3509                                    "\n".join(nic_errors))
3510
3511     # hypervisor list/parameters
3512     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3513     if self.op.hvparams:
3514       for hv_name, hv_dict in self.op.hvparams.items():
3515         if hv_name not in self.new_hvparams:
3516           self.new_hvparams[hv_name] = hv_dict
3517         else:
3518           self.new_hvparams[hv_name].update(hv_dict)
3519
3520     # os hypervisor parameters
3521     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3522     if self.op.os_hvp:
3523       for os_name, hvs in self.op.os_hvp.items():
3524         if os_name not in self.new_os_hvp:
3525           self.new_os_hvp[os_name] = hvs
3526         else:
3527           for hv_name, hv_dict in hvs.items():
3528             if hv_name not in self.new_os_hvp[os_name]:
3529               self.new_os_hvp[os_name][hv_name] = hv_dict
3530             else:
3531               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3532
3533     # os parameters
3534     self.new_osp = objects.FillDict(cluster.osparams, {})
3535     if self.op.osparams:
3536       for os_name, osp in self.op.osparams.items():
3537         if os_name not in self.new_osp:
3538           self.new_osp[os_name] = {}
3539
3540         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3541                                                   use_none=True)
3542
3543         if not self.new_osp[os_name]:
3544           # we removed all parameters
3545           del self.new_osp[os_name]
3546         else:
3547           # check the parameter validity (remote check)
3548           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3549                          os_name, self.new_osp[os_name])
3550
3551     # changes to the hypervisor list
3552     if self.op.enabled_hypervisors is not None:
3553       self.hv_list = self.op.enabled_hypervisors
3554       for hv in self.hv_list:
3555         # if the hypervisor doesn't already exist in the cluster
3556         # hvparams, we initialize it to empty, and then (in both
3557         # cases) we make sure to fill the defaults, as we might not
3558         # have a complete defaults list if the hypervisor wasn't
3559         # enabled before
3560         if hv not in new_hvp:
3561           new_hvp[hv] = {}
3562         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3563         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3564     else:
3565       self.hv_list = cluster.enabled_hypervisors
3566
3567     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3568       # either the enabled list has changed, or the parameters have, validate
3569       for hv_name, hv_params in self.new_hvparams.items():
3570         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3571             (self.op.enabled_hypervisors and
3572              hv_name in self.op.enabled_hypervisors)):
3573           # either this is a new hypervisor, or its parameters have changed
3574           hv_class = hypervisor.GetHypervisor(hv_name)
3575           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3576           hv_class.CheckParameterSyntax(hv_params)
3577           _CheckHVParams(self, node_list, hv_name, hv_params)
3578
3579     if self.op.os_hvp:
3580       # no need to check any newly-enabled hypervisors, since the
3581       # defaults have already been checked in the above code-block
3582       for os_name, os_hvp in self.new_os_hvp.items():
3583         for hv_name, hv_params in os_hvp.items():
3584           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3585           # we need to fill in the new os_hvp on top of the actual hv_p
3586           cluster_defaults = self.new_hvparams.get(hv_name, {})
3587           new_osp = objects.FillDict(cluster_defaults, hv_params)
3588           hv_class = hypervisor.GetHypervisor(hv_name)
3589           hv_class.CheckParameterSyntax(new_osp)
3590           _CheckHVParams(self, node_list, hv_name, new_osp)
3591
3592     if self.op.default_iallocator:
3593       alloc_script = utils.FindFile(self.op.default_iallocator,
3594                                     constants.IALLOCATOR_SEARCH_PATH,
3595                                     os.path.isfile)
3596       if alloc_script is None:
3597         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3598                                    " specified" % self.op.default_iallocator,
3599                                    errors.ECODE_INVAL)
3600
3601   def Exec(self, feedback_fn):
3602     """Change the parameters of the cluster.
3603
3604     """
3605     if self.op.vg_name is not None:
3606       new_volume = self.op.vg_name
3607       if not new_volume:
3608         new_volume = None
3609       if new_volume != self.cfg.GetVGName():
3610         self.cfg.SetVGName(new_volume)
3611       else:
3612         feedback_fn("Cluster LVM configuration already in desired"
3613                     " state, not changing")
3614     if self.op.drbd_helper is not None:
3615       new_helper = self.op.drbd_helper
3616       if not new_helper:
3617         new_helper = None
3618       if new_helper != self.cfg.GetDRBDHelper():
3619         self.cfg.SetDRBDHelper(new_helper)
3620       else:
3621         feedback_fn("Cluster DRBD helper already in desired state,"
3622                     " not changing")
3623     if self.op.hvparams:
3624       self.cluster.hvparams = self.new_hvparams
3625     if self.op.os_hvp:
3626       self.cluster.os_hvp = self.new_os_hvp
3627     if self.op.enabled_hypervisors is not None:
3628       self.cluster.hvparams = self.new_hvparams
3629       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3630     if self.op.beparams:
3631       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3632     if self.op.nicparams:
3633       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3634     if self.op.osparams:
3635       self.cluster.osparams = self.new_osp
3636     if self.op.ndparams:
3637       self.cluster.ndparams = self.new_ndparams
3638
3639     if self.op.candidate_pool_size is not None:
3640       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3641       # we need to update the pool size here, otherwise the save will fail
3642       _AdjustCandidatePool(self, [])
3643
3644     if self.op.maintain_node_health is not None:
3645       self.cluster.maintain_node_health = self.op.maintain_node_health
3646
3647     if self.op.prealloc_wipe_disks is not None:
3648       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3649
3650     if self.op.add_uids is not None:
3651       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3652
3653     if self.op.remove_uids is not None:
3654       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3655
3656     if self.op.uid_pool is not None:
3657       self.cluster.uid_pool = self.op.uid_pool
3658
3659     if self.op.default_iallocator is not None:
3660       self.cluster.default_iallocator = self.op.default_iallocator
3661
3662     if self.op.reserved_lvs is not None:
3663       self.cluster.reserved_lvs = self.op.reserved_lvs
3664
3665     def helper_os(aname, mods, desc):
3666       desc += " OS list"
3667       lst = getattr(self.cluster, aname)
3668       for key, val in mods:
3669         if key == constants.DDM_ADD:
3670           if val in lst:
3671             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3672           else:
3673             lst.append(val)
3674         elif key == constants.DDM_REMOVE:
3675           if val in lst:
3676             lst.remove(val)
3677           else:
3678             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3679         else:
3680           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3681
3682     if self.op.hidden_os:
3683       helper_os("hidden_os", self.op.hidden_os, "hidden")
3684
3685     if self.op.blacklisted_os:
3686       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3687
3688     if self.op.master_netdev:
3689       master = self.cfg.GetMasterNode()
3690       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3691                   self.cluster.master_netdev)
3692       result = self.rpc.call_node_deactivate_master_ip(master)
3693       result.Raise("Could not disable the master ip")
3694       feedback_fn("Changing master_netdev from %s to %s" %
3695                   (self.cluster.master_netdev, self.op.master_netdev))
3696       self.cluster.master_netdev = self.op.master_netdev
3697
3698     self.cfg.Update(self.cluster, feedback_fn)
3699
3700     if self.op.master_netdev:
3701       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3702                   self.op.master_netdev)
3703       result = self.rpc.call_node_activate_master_ip(master)
3704       if result.fail_msg:
3705         self.LogWarning("Could not re-enable the master ip on"
3706                         " the master, please restart manually: %s",
3707                         result.fail_msg)
3708
3709
3710 def _UploadHelper(lu, nodes, fname):
3711   """Helper for uploading a file and showing warnings.
3712
3713   """
3714   if os.path.exists(fname):
3715     result = lu.rpc.call_upload_file(nodes, fname)
3716     for to_node, to_result in result.items():
3717       msg = to_result.fail_msg
3718       if msg:
3719         msg = ("Copy of file %s to node %s failed: %s" %
3720                (fname, to_node, msg))
3721         lu.proc.LogWarning(msg)
3722
3723
3724 def _ComputeAncillaryFiles(cluster, redist):
3725   """Compute files external to Ganeti which need to be consistent.
3726
3727   @type redist: boolean
3728   @param redist: Whether to include files which need to be redistributed
3729
3730   """
3731   # Compute files for all nodes
3732   files_all = set([
3733     constants.SSH_KNOWN_HOSTS_FILE,
3734     constants.CONFD_HMAC_KEY,
3735     constants.CLUSTER_DOMAIN_SECRET_FILE,
3736     constants.RAPI_USERS_FILE,
3737     ])
3738
3739   if not redist:
3740     files_all.update(constants.ALL_CERT_FILES)
3741     files_all.update(ssconf.SimpleStore().GetFileList())
3742   else:
3743     # we need to ship at least the RAPI certificate
3744     files_all.add(constants.RAPI_CERT_FILE)
3745
3746   if cluster.modify_etc_hosts:
3747     files_all.add(constants.ETC_HOSTS)
3748
3749   # Files which are optional, these must:
3750   # - be present in one other category as well
3751   # - either exist or not exist on all nodes of that category (mc, vm all)
3752   files_opt = set([
3753     constants.RAPI_USERS_FILE,
3754     ])
3755
3756   # Files which should only be on master candidates
3757   files_mc = set()
3758   if not redist:
3759     files_mc.add(constants.CLUSTER_CONF_FILE)
3760
3761   # Files which should only be on VM-capable nodes
3762   files_vm = set(filename
3763     for hv_name in cluster.enabled_hypervisors
3764     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3765
3766   files_opt |= set(filename
3767     for hv_name in cluster.enabled_hypervisors
3768     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3769
3770   # Filenames in each category must be unique
3771   all_files_set = files_all | files_mc | files_vm
3772   assert (len(all_files_set) ==
3773           sum(map(len, [files_all, files_mc, files_vm]))), \
3774          "Found file listed in more than one file list"
3775
3776   # Optional files must be present in one other category
3777   assert all_files_set.issuperset(files_opt), \
3778          "Optional file not in a different required list"
3779
3780   return (files_all, files_opt, files_mc, files_vm)
3781
3782
3783 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3784   """Distribute additional files which are part of the cluster configuration.
3785
3786   ConfigWriter takes care of distributing the config and ssconf files, but
3787   there are more files which should be distributed to all nodes. This function
3788   makes sure those are copied.
3789
3790   @param lu: calling logical unit
3791   @param additional_nodes: list of nodes not in the config to distribute to
3792   @type additional_vm: boolean
3793   @param additional_vm: whether the additional nodes are vm-capable or not
3794
3795   """
3796   # Gather target nodes
3797   cluster = lu.cfg.GetClusterInfo()
3798   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3799
3800   online_nodes = lu.cfg.GetOnlineNodeList()
3801   vm_nodes = lu.cfg.GetVmCapableNodeList()
3802
3803   if additional_nodes is not None:
3804     online_nodes.extend(additional_nodes)
3805     if additional_vm:
3806       vm_nodes.extend(additional_nodes)
3807
3808   # Never distribute to master node
3809   for nodelist in [online_nodes, vm_nodes]:
3810     if master_info.name in nodelist:
3811       nodelist.remove(master_info.name)
3812
3813   # Gather file lists
3814   (files_all, _, files_mc, files_vm) = \
3815     _ComputeAncillaryFiles(cluster, True)
3816
3817   # Never re-distribute configuration file from here
3818   assert not (constants.CLUSTER_CONF_FILE in files_all or
3819               constants.CLUSTER_CONF_FILE in files_vm)
3820   assert not files_mc, "Master candidates not handled in this function"
3821
3822   filemap = [
3823     (online_nodes, files_all),
3824     (vm_nodes, files_vm),
3825     ]
3826
3827   # Upload the files
3828   for (node_list, files) in filemap:
3829     for fname in files:
3830       _UploadHelper(lu, node_list, fname)
3831
3832
3833 class LUClusterRedistConf(NoHooksLU):
3834   """Force the redistribution of cluster configuration.
3835
3836   This is a very simple LU.
3837
3838   """
3839   REQ_BGL = False
3840
3841   def ExpandNames(self):
3842     self.needed_locks = {
3843       locking.LEVEL_NODE: locking.ALL_SET,
3844     }
3845     self.share_locks[locking.LEVEL_NODE] = 1
3846
3847   def Exec(self, feedback_fn):
3848     """Redistribute the configuration.
3849
3850     """
3851     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3852     _RedistributeAncillaryFiles(self)
3853
3854
3855 class LUClusterActivateMasterIp(NoHooksLU):
3856   """Activate the master IP on the master node.
3857
3858   """
3859   def Exec(self, feedback_fn):
3860     """Activate the master IP.
3861
3862     """
3863     master = self.cfg.GetMasterNode()
3864     result = self.rpc.call_node_activate_master_ip(master)
3865     result.Raise("Could not activate the master IP")
3866
3867
3868 class LUClusterDeactivateMasterIp(NoHooksLU):
3869   """Deactivate the master IP on the master node.
3870
3871   """
3872   def Exec(self, feedback_fn):
3873     """Deactivate the master IP.
3874
3875     """
3876     master = self.cfg.GetMasterNode()
3877     result = self.rpc.call_node_deactivate_master_ip(master)
3878     result.Raise("Could not deactivate the master IP")
3879
3880
3881 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3882   """Sleep and poll for an instance's disk to sync.
3883
3884   """
3885   if not instance.disks or disks is not None and not disks:
3886     return True
3887
3888   disks = _ExpandCheckDisks(instance, disks)
3889
3890   if not oneshot:
3891     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3892
3893   node = instance.primary_node
3894
3895   for dev in disks:
3896     lu.cfg.SetDiskID(dev, node)
3897
3898   # TODO: Convert to utils.Retry
3899
3900   retries = 0
3901   degr_retries = 10 # in seconds, as we sleep 1 second each time
3902   while True:
3903     max_time = 0
3904     done = True
3905     cumul_degraded = False
3906     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3907     msg = rstats.fail_msg
3908     if msg:
3909       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3910       retries += 1
3911       if retries >= 10:
3912         raise errors.RemoteError("Can't contact node %s for mirror data,"
3913                                  " aborting." % node)
3914       time.sleep(6)
3915       continue
3916     rstats = rstats.payload
3917     retries = 0
3918     for i, mstat in enumerate(rstats):
3919       if mstat is None:
3920         lu.LogWarning("Can't compute data for node %s/%s",
3921                            node, disks[i].iv_name)
3922         continue
3923
3924       cumul_degraded = (cumul_degraded or
3925                         (mstat.is_degraded and mstat.sync_percent is None))
3926       if mstat.sync_percent is not None:
3927         done = False
3928         if mstat.estimated_time is not None:
3929           rem_time = ("%s remaining (estimated)" %
3930                       utils.FormatSeconds(mstat.estimated_time))
3931           max_time = mstat.estimated_time
3932         else:
3933           rem_time = "no time estimate"
3934         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3935                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3936
3937     # if we're done but degraded, let's do a few small retries, to
3938     # make sure we see a stable and not transient situation; therefore
3939     # we force restart of the loop
3940     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3941       logging.info("Degraded disks found, %d retries left", degr_retries)
3942       degr_retries -= 1
3943       time.sleep(1)
3944       continue
3945
3946     if done or oneshot:
3947       break
3948
3949     time.sleep(min(60, max_time))
3950
3951   if done:
3952     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3953   return not cumul_degraded
3954
3955
3956 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3957   """Check that mirrors are not degraded.
3958
3959   The ldisk parameter, if True, will change the test from the
3960   is_degraded attribute (which represents overall non-ok status for
3961   the device(s)) to the ldisk (representing the local storage status).
3962
3963   """
3964   lu.cfg.SetDiskID(dev, node)
3965
3966   result = True
3967
3968   if on_primary or dev.AssembleOnSecondary():
3969     rstats = lu.rpc.call_blockdev_find(node, dev)
3970     msg = rstats.fail_msg
3971     if msg:
3972       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3973       result = False
3974     elif not rstats.payload:
3975       lu.LogWarning("Can't find disk on node %s", node)
3976       result = False
3977     else:
3978       if ldisk:
3979         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3980       else:
3981         result = result and not rstats.payload.is_degraded
3982
3983   if dev.children:
3984     for child in dev.children:
3985       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3986
3987   return result
3988
3989
3990 class LUOobCommand(NoHooksLU):
3991   """Logical unit for OOB handling.
3992
3993   """
3994   REG_BGL = False
3995   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3996
3997   def ExpandNames(self):
3998     """Gather locks we need.
3999
4000     """
4001     if self.op.node_names:
4002       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4003       lock_names = self.op.node_names
4004     else:
4005       lock_names = locking.ALL_SET
4006
4007     self.needed_locks = {
4008       locking.LEVEL_NODE: lock_names,
4009       }
4010
4011   def CheckPrereq(self):
4012     """Check prerequisites.
4013
4014     This checks:
4015      - the node exists in the configuration
4016      - OOB is supported
4017
4018     Any errors are signaled by raising errors.OpPrereqError.
4019
4020     """
4021     self.nodes = []
4022     self.master_node = self.cfg.GetMasterNode()
4023
4024     assert self.op.power_delay >= 0.0
4025
4026     if self.op.node_names:
4027       if (self.op.command in self._SKIP_MASTER and
4028           self.master_node in self.op.node_names):
4029         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4030         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4031
4032         if master_oob_handler:
4033           additional_text = ("run '%s %s %s' if you want to operate on the"
4034                              " master regardless") % (master_oob_handler,
4035                                                       self.op.command,
4036                                                       self.master_node)
4037         else:
4038           additional_text = "it does not support out-of-band operations"
4039
4040         raise errors.OpPrereqError(("Operating on the master node %s is not"
4041                                     " allowed for %s; %s") %
4042                                    (self.master_node, self.op.command,
4043                                     additional_text), errors.ECODE_INVAL)
4044     else:
4045       self.op.node_names = self.cfg.GetNodeList()
4046       if self.op.command in self._SKIP_MASTER:
4047         self.op.node_names.remove(self.master_node)
4048
4049     if self.op.command in self._SKIP_MASTER:
4050       assert self.master_node not in self.op.node_names
4051
4052     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4053       if node is None:
4054         raise errors.OpPrereqError("Node %s not found" % node_name,
4055                                    errors.ECODE_NOENT)
4056       else:
4057         self.nodes.append(node)
4058
4059       if (not self.op.ignore_status and
4060           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4061         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4062                                     " not marked offline") % node_name,
4063                                    errors.ECODE_STATE)
4064
4065   def Exec(self, feedback_fn):
4066     """Execute OOB and return result if we expect any.
4067
4068     """
4069     master_node = self.master_node
4070     ret = []
4071
4072     for idx, node in enumerate(utils.NiceSort(self.nodes,
4073                                               key=lambda node: node.name)):
4074       node_entry = [(constants.RS_NORMAL, node.name)]
4075       ret.append(node_entry)
4076
4077       oob_program = _SupportsOob(self.cfg, node)
4078
4079       if not oob_program:
4080         node_entry.append((constants.RS_UNAVAIL, None))
4081         continue
4082
4083       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4084                    self.op.command, oob_program, node.name)
4085       result = self.rpc.call_run_oob(master_node, oob_program,
4086                                      self.op.command, node.name,
4087                                      self.op.timeout)
4088
4089       if result.fail_msg:
4090         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4091                         node.name, result.fail_msg)
4092         node_entry.append((constants.RS_NODATA, None))
4093       else:
4094         try:
4095           self._CheckPayload(result)
4096         except errors.OpExecError, err:
4097           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4098                           node.name, err)
4099           node_entry.append((constants.RS_NODATA, None))
4100         else:
4101           if self.op.command == constants.OOB_HEALTH:
4102             # For health we should log important events
4103             for item, status in result.payload:
4104               if status in [constants.OOB_STATUS_WARNING,
4105                             constants.OOB_STATUS_CRITICAL]:
4106                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4107                                 item, node.name, status)
4108
4109           if self.op.command == constants.OOB_POWER_ON:
4110             node.powered = True
4111           elif self.op.command == constants.OOB_POWER_OFF:
4112             node.powered = False
4113           elif self.op.command == constants.OOB_POWER_STATUS:
4114             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4115             if powered != node.powered:
4116               logging.warning(("Recorded power state (%s) of node '%s' does not"
4117                                " match actual power state (%s)"), node.powered,
4118                               node.name, powered)
4119
4120           # For configuration changing commands we should update the node
4121           if self.op.command in (constants.OOB_POWER_ON,
4122                                  constants.OOB_POWER_OFF):
4123             self.cfg.Update(node, feedback_fn)
4124
4125           node_entry.append((constants.RS_NORMAL, result.payload))
4126
4127           if (self.op.command == constants.OOB_POWER_ON and
4128               idx < len(self.nodes) - 1):
4129             time.sleep(self.op.power_delay)
4130
4131     return ret
4132
4133   def _CheckPayload(self, result):
4134     """Checks if the payload is valid.
4135
4136     @param result: RPC result
4137     @raises errors.OpExecError: If payload is not valid
4138
4139     """
4140     errs = []
4141     if self.op.command == constants.OOB_HEALTH:
4142       if not isinstance(result.payload, list):
4143         errs.append("command 'health' is expected to return a list but got %s" %
4144                     type(result.payload))
4145       else:
4146         for item, status in result.payload:
4147           if status not in constants.OOB_STATUSES:
4148             errs.append("health item '%s' has invalid status '%s'" %
4149                         (item, status))
4150
4151     if self.op.command == constants.OOB_POWER_STATUS:
4152       if not isinstance(result.payload, dict):
4153         errs.append("power-status is expected to return a dict but got %s" %
4154                     type(result.payload))
4155
4156     if self.op.command in [
4157         constants.OOB_POWER_ON,
4158         constants.OOB_POWER_OFF,
4159         constants.OOB_POWER_CYCLE,
4160         ]:
4161       if result.payload is not None:
4162         errs.append("%s is expected to not return payload but got '%s'" %
4163                     (self.op.command, result.payload))
4164
4165     if errs:
4166       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4167                                utils.CommaJoin(errs))
4168
4169
4170 class _OsQuery(_QueryBase):
4171   FIELDS = query.OS_FIELDS
4172
4173   def ExpandNames(self, lu):
4174     # Lock all nodes in shared mode
4175     # Temporary removal of locks, should be reverted later
4176     # TODO: reintroduce locks when they are lighter-weight
4177     lu.needed_locks = {}
4178     #self.share_locks[locking.LEVEL_NODE] = 1
4179     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4180
4181     # The following variables interact with _QueryBase._GetNames
4182     if self.names:
4183       self.wanted = self.names
4184     else:
4185       self.wanted = locking.ALL_SET
4186
4187     self.do_locking = self.use_locking
4188
4189   def DeclareLocks(self, lu, level):
4190     pass
4191
4192   @staticmethod
4193   def _DiagnoseByOS(rlist):
4194     """Remaps a per-node return list into an a per-os per-node dictionary
4195
4196     @param rlist: a map with node names as keys and OS objects as values
4197
4198     @rtype: dict
4199     @return: a dictionary with osnames as keys and as value another
4200         map, with nodes as keys and tuples of (path, status, diagnose,
4201         variants, parameters, api_versions) as values, eg::
4202
4203           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4204                                      (/srv/..., False, "invalid api")],
4205                            "node2": [(/srv/..., True, "", [], [])]}
4206           }
4207
4208     """
4209     all_os = {}
4210     # we build here the list of nodes that didn't fail the RPC (at RPC
4211     # level), so that nodes with a non-responding node daemon don't
4212     # make all OSes invalid
4213     good_nodes = [node_name for node_name in rlist
4214                   if not rlist[node_name].fail_msg]
4215     for node_name, nr in rlist.items():
4216       if nr.fail_msg or not nr.payload:
4217         continue
4218       for (name, path, status, diagnose, variants,
4219            params, api_versions) in nr.payload:
4220         if name not in all_os:
4221           # build a list of nodes for this os containing empty lists
4222           # for each node in node_list
4223           all_os[name] = {}
4224           for nname in good_nodes:
4225             all_os[name][nname] = []
4226         # convert params from [name, help] to (name, help)
4227         params = [tuple(v) for v in params]
4228         all_os[name][node_name].append((path, status, diagnose,
4229                                         variants, params, api_versions))
4230     return all_os
4231
4232   def _GetQueryData(self, lu):
4233     """Computes the list of nodes and their attributes.
4234
4235     """
4236     # Locking is not used
4237     assert not (compat.any(lu.glm.is_owned(level)
4238                            for level in locking.LEVELS
4239                            if level != locking.LEVEL_CLUSTER) or
4240                 self.do_locking or self.use_locking)
4241
4242     valid_nodes = [node.name
4243                    for node in lu.cfg.GetAllNodesInfo().values()
4244                    if not node.offline and node.vm_capable]
4245     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4246     cluster = lu.cfg.GetClusterInfo()
4247
4248     data = {}
4249
4250     for (os_name, os_data) in pol.items():
4251       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4252                           hidden=(os_name in cluster.hidden_os),
4253                           blacklisted=(os_name in cluster.blacklisted_os))
4254
4255       variants = set()
4256       parameters = set()
4257       api_versions = set()
4258
4259       for idx, osl in enumerate(os_data.values()):
4260         info.valid = bool(info.valid and osl and osl[0][1])
4261         if not info.valid:
4262           break
4263
4264         (node_variants, node_params, node_api) = osl[0][3:6]
4265         if idx == 0:
4266           # First entry
4267           variants.update(node_variants)
4268           parameters.update(node_params)
4269           api_versions.update(node_api)
4270         else:
4271           # Filter out inconsistent values
4272           variants.intersection_update(node_variants)
4273           parameters.intersection_update(node_params)
4274           api_versions.intersection_update(node_api)
4275
4276       info.variants = list(variants)
4277       info.parameters = list(parameters)
4278       info.api_versions = list(api_versions)
4279
4280       data[os_name] = info
4281
4282     # Prepare data in requested order
4283     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4284             if name in data]
4285
4286
4287 class LUOsDiagnose(NoHooksLU):
4288   """Logical unit for OS diagnose/query.
4289
4290   """
4291   REQ_BGL = False
4292
4293   @staticmethod
4294   def _BuildFilter(fields, names):
4295     """Builds a filter for querying OSes.
4296
4297     """
4298     name_filter = qlang.MakeSimpleFilter("name", names)
4299
4300     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4301     # respective field is not requested
4302     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4303                      for fname in ["hidden", "blacklisted"]
4304                      if fname not in fields]
4305     if "valid" not in fields:
4306       status_filter.append([qlang.OP_TRUE, "valid"])
4307
4308     if status_filter:
4309       status_filter.insert(0, qlang.OP_AND)
4310     else:
4311       status_filter = None
4312
4313     if name_filter and status_filter:
4314       return [qlang.OP_AND, name_filter, status_filter]
4315     elif name_filter:
4316       return name_filter
4317     else:
4318       return status_filter
4319
4320   def CheckArguments(self):
4321     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4322                        self.op.output_fields, False)
4323
4324   def ExpandNames(self):
4325     self.oq.ExpandNames(self)
4326
4327   def Exec(self, feedback_fn):
4328     return self.oq.OldStyleQuery(self)
4329
4330
4331 class LUNodeRemove(LogicalUnit):
4332   """Logical unit for removing a node.
4333
4334   """
4335   HPATH = "node-remove"
4336   HTYPE = constants.HTYPE_NODE
4337
4338   def BuildHooksEnv(self):
4339     """Build hooks env.
4340
4341     This doesn't run on the target node in the pre phase as a failed
4342     node would then be impossible to remove.
4343
4344     """
4345     return {
4346       "OP_TARGET": self.op.node_name,
4347       "NODE_NAME": self.op.node_name,
4348       }
4349
4350   def BuildHooksNodes(self):
4351     """Build hooks nodes.
4352
4353     """
4354     all_nodes = self.cfg.GetNodeList()
4355     try:
4356       all_nodes.remove(self.op.node_name)
4357     except ValueError:
4358       logging.warning("Node '%s', which is about to be removed, was not found"
4359                       " in the list of all nodes", self.op.node_name)
4360     return (all_nodes, all_nodes)
4361
4362   def CheckPrereq(self):
4363     """Check prerequisites.
4364
4365     This checks:
4366      - the node exists in the configuration
4367      - it does not have primary or secondary instances
4368      - it's not the master
4369
4370     Any errors are signaled by raising errors.OpPrereqError.
4371
4372     """
4373     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4374     node = self.cfg.GetNodeInfo(self.op.node_name)
4375     assert node is not None
4376
4377     masternode = self.cfg.GetMasterNode()
4378     if node.name == masternode:
4379       raise errors.OpPrereqError("Node is the master node, failover to another"
4380                                  " node is required", errors.ECODE_INVAL)
4381
4382     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4383       if node.name in instance.all_nodes:
4384         raise errors.OpPrereqError("Instance %s is still running on the node,"
4385                                    " please remove first" % instance_name,
4386                                    errors.ECODE_INVAL)
4387     self.op.node_name = node.name
4388     self.node = node
4389
4390   def Exec(self, feedback_fn):
4391     """Removes the node from the cluster.
4392
4393     """
4394     node = self.node
4395     logging.info("Stopping the node daemon and removing configs from node %s",
4396                  node.name)
4397
4398     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4399
4400     # Promote nodes to master candidate as needed
4401     _AdjustCandidatePool(self, exceptions=[node.name])
4402     self.context.RemoveNode(node.name)
4403
4404     # Run post hooks on the node before it's removed
4405     _RunPostHook(self, node.name)
4406
4407     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4408     msg = result.fail_msg
4409     if msg:
4410       self.LogWarning("Errors encountered on the remote node while leaving"
4411                       " the cluster: %s", msg)
4412
4413     # Remove node from our /etc/hosts
4414     if self.cfg.GetClusterInfo().modify_etc_hosts:
4415       master_node = self.cfg.GetMasterNode()
4416       result = self.rpc.call_etc_hosts_modify(master_node,
4417                                               constants.ETC_HOSTS_REMOVE,
4418                                               node.name, None)
4419       result.Raise("Can't update hosts file with new host data")
4420       _RedistributeAncillaryFiles(self)
4421
4422
4423 class _NodeQuery(_QueryBase):
4424   FIELDS = query.NODE_FIELDS
4425
4426   def ExpandNames(self, lu):
4427     lu.needed_locks = {}
4428     lu.share_locks = _ShareAll()
4429
4430     if self.names:
4431       self.wanted = _GetWantedNodes(lu, self.names)
4432     else:
4433       self.wanted = locking.ALL_SET
4434
4435     self.do_locking = (self.use_locking and
4436                        query.NQ_LIVE in self.requested_data)
4437
4438     if self.do_locking:
4439       # If any non-static field is requested we need to lock the nodes
4440       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4441
4442   def DeclareLocks(self, lu, level):
4443     pass
4444
4445   def _GetQueryData(self, lu):
4446     """Computes the list of nodes and their attributes.
4447
4448     """
4449     all_info = lu.cfg.GetAllNodesInfo()
4450
4451     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4452
4453     # Gather data as requested
4454     if query.NQ_LIVE in self.requested_data:
4455       # filter out non-vm_capable nodes
4456       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4457
4458       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4459                                         lu.cfg.GetHypervisorType())
4460       live_data = dict((name, nresult.payload)
4461                        for (name, nresult) in node_data.items()
4462                        if not nresult.fail_msg and nresult.payload)
4463     else:
4464       live_data = None
4465
4466     if query.NQ_INST in self.requested_data:
4467       node_to_primary = dict([(name, set()) for name in nodenames])
4468       node_to_secondary = dict([(name, set()) for name in nodenames])
4469
4470       inst_data = lu.cfg.GetAllInstancesInfo()
4471
4472       for inst in inst_data.values():
4473         if inst.primary_node in node_to_primary:
4474           node_to_primary[inst.primary_node].add(inst.name)
4475         for secnode in inst.secondary_nodes:
4476           if secnode in node_to_secondary:
4477             node_to_secondary[secnode].add(inst.name)
4478     else:
4479       node_to_primary = None
4480       node_to_secondary = None
4481
4482     if query.NQ_OOB in self.requested_data:
4483       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4484                          for name, node in all_info.iteritems())
4485     else:
4486       oob_support = None
4487
4488     if query.NQ_GROUP in self.requested_data:
4489       groups = lu.cfg.GetAllNodeGroupsInfo()
4490     else:
4491       groups = {}
4492
4493     return query.NodeQueryData([all_info[name] for name in nodenames],
4494                                live_data, lu.cfg.GetMasterNode(),
4495                                node_to_primary, node_to_secondary, groups,
4496                                oob_support, lu.cfg.GetClusterInfo())
4497
4498
4499 class LUNodeQuery(NoHooksLU):
4500   """Logical unit for querying nodes.
4501
4502   """
4503   # pylint: disable=W0142
4504   REQ_BGL = False
4505
4506   def CheckArguments(self):
4507     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4508                          self.op.output_fields, self.op.use_locking)
4509
4510   def ExpandNames(self):
4511     self.nq.ExpandNames(self)
4512
4513   def Exec(self, feedback_fn):
4514     return self.nq.OldStyleQuery(self)
4515
4516
4517 class LUNodeQueryvols(NoHooksLU):
4518   """Logical unit for getting volumes on node(s).
4519
4520   """
4521   REQ_BGL = False
4522   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4523   _FIELDS_STATIC = utils.FieldSet("node")
4524
4525   def CheckArguments(self):
4526     _CheckOutputFields(static=self._FIELDS_STATIC,
4527                        dynamic=self._FIELDS_DYNAMIC,
4528                        selected=self.op.output_fields)
4529
4530   def ExpandNames(self):
4531     self.needed_locks = {}
4532     self.share_locks[locking.LEVEL_NODE] = 1
4533     if not self.op.nodes:
4534       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4535     else:
4536       self.needed_locks[locking.LEVEL_NODE] = \
4537         _GetWantedNodes(self, self.op.nodes)
4538
4539   def Exec(self, feedback_fn):
4540     """Computes the list of nodes and their attributes.
4541
4542     """
4543     nodenames = self.owned_locks(locking.LEVEL_NODE)
4544     volumes = self.rpc.call_node_volumes(nodenames)
4545
4546     ilist = self.cfg.GetAllInstancesInfo()
4547     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4548
4549     output = []
4550     for node in nodenames:
4551       nresult = volumes[node]
4552       if nresult.offline:
4553         continue
4554       msg = nresult.fail_msg
4555       if msg:
4556         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4557         continue
4558
4559       node_vols = sorted(nresult.payload,
4560                          key=operator.itemgetter("dev"))
4561
4562       for vol in node_vols:
4563         node_output = []
4564         for field in self.op.output_fields:
4565           if field == "node":
4566             val = node
4567           elif field == "phys":
4568             val = vol["dev"]
4569           elif field == "vg":
4570             val = vol["vg"]
4571           elif field == "name":
4572             val = vol["name"]
4573           elif field == "size":
4574             val = int(float(vol["size"]))
4575           elif field == "instance":
4576             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4577           else:
4578             raise errors.ParameterError(field)
4579           node_output.append(str(val))
4580
4581         output.append(node_output)
4582
4583     return output
4584
4585
4586 class LUNodeQueryStorage(NoHooksLU):
4587   """Logical unit for getting information on storage units on node(s).
4588
4589   """
4590   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4591   REQ_BGL = False
4592
4593   def CheckArguments(self):
4594     _CheckOutputFields(static=self._FIELDS_STATIC,
4595                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4596                        selected=self.op.output_fields)
4597
4598   def ExpandNames(self):
4599     self.needed_locks = {}
4600     self.share_locks[locking.LEVEL_NODE] = 1
4601
4602     if self.op.nodes:
4603       self.needed_locks[locking.LEVEL_NODE] = \
4604         _GetWantedNodes(self, self.op.nodes)
4605     else:
4606       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4607
4608   def Exec(self, feedback_fn):
4609     """Computes the list of nodes and their attributes.
4610
4611     """
4612     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4613
4614     # Always get name to sort by
4615     if constants.SF_NAME in self.op.output_fields:
4616       fields = self.op.output_fields[:]
4617     else:
4618       fields = [constants.SF_NAME] + self.op.output_fields
4619
4620     # Never ask for node or type as it's only known to the LU
4621     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4622       while extra in fields:
4623         fields.remove(extra)
4624
4625     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4626     name_idx = field_idx[constants.SF_NAME]
4627
4628     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4629     data = self.rpc.call_storage_list(self.nodes,
4630                                       self.op.storage_type, st_args,
4631                                       self.op.name, fields)
4632
4633     result = []
4634
4635     for node in utils.NiceSort(self.nodes):
4636       nresult = data[node]
4637       if nresult.offline:
4638         continue
4639
4640       msg = nresult.fail_msg
4641       if msg:
4642         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4643         continue
4644
4645       rows = dict([(row[name_idx], row) for row in nresult.payload])
4646
4647       for name in utils.NiceSort(rows.keys()):
4648         row = rows[name]
4649
4650         out = []
4651
4652         for field in self.op.output_fields:
4653           if field == constants.SF_NODE:
4654             val = node
4655           elif field == constants.SF_TYPE:
4656             val = self.op.storage_type
4657           elif field in field_idx:
4658             val = row[field_idx[field]]
4659           else:
4660             raise errors.ParameterError(field)
4661
4662           out.append(val)
4663
4664         result.append(out)
4665
4666     return result
4667
4668
4669 class _InstanceQuery(_QueryBase):
4670   FIELDS = query.INSTANCE_FIELDS
4671
4672   def ExpandNames(self, lu):
4673     lu.needed_locks = {}
4674     lu.share_locks = _ShareAll()
4675
4676     if self.names:
4677       self.wanted = _GetWantedInstances(lu, self.names)
4678     else:
4679       self.wanted = locking.ALL_SET
4680
4681     self.do_locking = (self.use_locking and
4682                        query.IQ_LIVE in self.requested_data)
4683     if self.do_locking:
4684       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4685       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4686       lu.needed_locks[locking.LEVEL_NODE] = []
4687       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4688
4689     self.do_grouplocks = (self.do_locking and
4690                           query.IQ_NODES in self.requested_data)
4691
4692   def DeclareLocks(self, lu, level):
4693     if self.do_locking:
4694       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4695         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4696
4697         # Lock all groups used by instances optimistically; this requires going
4698         # via the node before it's locked, requiring verification later on
4699         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4700           set(group_uuid
4701               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4702               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4703       elif level == locking.LEVEL_NODE:
4704         lu._LockInstancesNodes() # pylint: disable=W0212
4705
4706   @staticmethod
4707   def _CheckGroupLocks(lu):
4708     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4709     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4710
4711     # Check if node groups for locked instances are still correct
4712     for instance_name in owned_instances:
4713       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4714
4715   def _GetQueryData(self, lu):
4716     """Computes the list of instances and their attributes.
4717
4718     """
4719     if self.do_grouplocks:
4720       self._CheckGroupLocks(lu)
4721
4722     cluster = lu.cfg.GetClusterInfo()
4723     all_info = lu.cfg.GetAllInstancesInfo()
4724
4725     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4726
4727     instance_list = [all_info[name] for name in instance_names]
4728     nodes = frozenset(itertools.chain(*(inst.all_nodes
4729                                         for inst in instance_list)))
4730     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4731     bad_nodes = []
4732     offline_nodes = []
4733     wrongnode_inst = set()
4734
4735     # Gather data as requested
4736     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4737       live_data = {}
4738       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4739       for name in nodes:
4740         result = node_data[name]
4741         if result.offline:
4742           # offline nodes will be in both lists
4743           assert result.fail_msg
4744           offline_nodes.append(name)
4745         if result.fail_msg:
4746           bad_nodes.append(name)
4747         elif result.payload:
4748           for inst in result.payload:
4749             if inst in all_info:
4750               if all_info[inst].primary_node == name:
4751                 live_data.update(result.payload)
4752               else:
4753                 wrongnode_inst.add(inst)
4754             else:
4755               # orphan instance; we don't list it here as we don't
4756               # handle this case yet in the output of instance listing
4757               logging.warning("Orphan instance '%s' found on node %s",
4758                               inst, name)
4759         # else no instance is alive
4760     else:
4761       live_data = {}
4762
4763     if query.IQ_DISKUSAGE in self.requested_data:
4764       disk_usage = dict((inst.name,
4765                          _ComputeDiskSize(inst.disk_template,
4766                                           [{constants.IDISK_SIZE: disk.size}
4767                                            for disk in inst.disks]))
4768                         for inst in instance_list)
4769     else:
4770       disk_usage = None
4771
4772     if query.IQ_CONSOLE in self.requested_data:
4773       consinfo = {}
4774       for inst in instance_list:
4775         if inst.name in live_data:
4776           # Instance is running
4777           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4778         else:
4779           consinfo[inst.name] = None
4780       assert set(consinfo.keys()) == set(instance_names)
4781     else:
4782       consinfo = None
4783
4784     if query.IQ_NODES in self.requested_data:
4785       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4786                                             instance_list)))
4787       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4788       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4789                     for uuid in set(map(operator.attrgetter("group"),
4790                                         nodes.values())))
4791     else:
4792       nodes = None
4793       groups = None
4794
4795     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4796                                    disk_usage, offline_nodes, bad_nodes,
4797                                    live_data, wrongnode_inst, consinfo,
4798                                    nodes, groups)
4799
4800
4801 class LUQuery(NoHooksLU):
4802   """Query for resources/items of a certain kind.
4803
4804   """
4805   # pylint: disable=W0142
4806   REQ_BGL = False
4807
4808   def CheckArguments(self):
4809     qcls = _GetQueryImplementation(self.op.what)
4810
4811     self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4812
4813   def ExpandNames(self):
4814     self.impl.ExpandNames(self)
4815
4816   def DeclareLocks(self, level):
4817     self.impl.DeclareLocks(self, level)
4818
4819   def Exec(self, feedback_fn):
4820     return self.impl.NewStyleQuery(self)
4821
4822
4823 class LUQueryFields(NoHooksLU):
4824   """Query for resources/items of a certain kind.
4825
4826   """
4827   # pylint: disable=W0142
4828   REQ_BGL = False
4829
4830   def CheckArguments(self):
4831     self.qcls = _GetQueryImplementation(self.op.what)
4832
4833   def ExpandNames(self):
4834     self.needed_locks = {}
4835
4836   def Exec(self, feedback_fn):
4837     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4838
4839
4840 class LUNodeModifyStorage(NoHooksLU):
4841   """Logical unit for modifying a storage volume on a node.
4842
4843   """
4844   REQ_BGL = False
4845
4846   def CheckArguments(self):
4847     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4848
4849     storage_type = self.op.storage_type
4850
4851     try:
4852       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4853     except KeyError:
4854       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4855                                  " modified" % storage_type,
4856                                  errors.ECODE_INVAL)
4857
4858     diff = set(self.op.changes.keys()) - modifiable
4859     if diff:
4860       raise errors.OpPrereqError("The following fields can not be modified for"
4861                                  " storage units of type '%s': %r" %
4862                                  (storage_type, list(diff)),
4863                                  errors.ECODE_INVAL)
4864
4865   def ExpandNames(self):
4866     self.needed_locks = {
4867       locking.LEVEL_NODE: self.op.node_name,
4868       }
4869
4870   def Exec(self, feedback_fn):
4871     """Computes the list of nodes and their attributes.
4872
4873     """
4874     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4875     result = self.rpc.call_storage_modify(self.op.node_name,
4876                                           self.op.storage_type, st_args,
4877                                           self.op.name, self.op.changes)
4878     result.Raise("Failed to modify storage unit '%s' on %s" %
4879                  (self.op.name, self.op.node_name))
4880
4881
4882 class LUNodeAdd(LogicalUnit):
4883   """Logical unit for adding node to the cluster.
4884
4885   """
4886   HPATH = "node-add"
4887   HTYPE = constants.HTYPE_NODE
4888   _NFLAGS = ["master_capable", "vm_capable"]
4889
4890   def CheckArguments(self):
4891     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4892     # validate/normalize the node name
4893     self.hostname = netutils.GetHostname(name=self.op.node_name,
4894                                          family=self.primary_ip_family)
4895     self.op.node_name = self.hostname.name
4896
4897     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4898       raise errors.OpPrereqError("Cannot readd the master node",
4899                                  errors.ECODE_STATE)
4900
4901     if self.op.readd and self.op.group:
4902       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4903                                  " being readded", errors.ECODE_INVAL)
4904
4905   def BuildHooksEnv(self):
4906     """Build hooks env.
4907
4908     This will run on all nodes before, and on all nodes + the new node after.
4909
4910     """
4911     return {
4912       "OP_TARGET": self.op.node_name,
4913       "NODE_NAME": self.op.node_name,
4914       "NODE_PIP": self.op.primary_ip,
4915       "NODE_SIP": self.op.secondary_ip,
4916       "MASTER_CAPABLE": str(self.op.master_capable),
4917       "VM_CAPABLE": str(self.op.vm_capable),
4918       }
4919
4920   def BuildHooksNodes(self):
4921     """Build hooks nodes.
4922
4923     """
4924     # Exclude added node
4925     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4926     post_nodes = pre_nodes + [self.op.node_name, ]
4927
4928     return (pre_nodes, post_nodes)
4929
4930   def CheckPrereq(self):
4931     """Check prerequisites.
4932
4933     This checks:
4934      - the new node is not already in the config
4935      - it is resolvable
4936      - its parameters (single/dual homed) matches the cluster
4937
4938     Any errors are signaled by raising errors.OpPrereqError.
4939
4940     """
4941     cfg = self.cfg
4942     hostname = self.hostname
4943     node = hostname.name
4944     primary_ip = self.op.primary_ip = hostname.ip
4945     if self.op.secondary_ip is None:
4946       if self.primary_ip_family == netutils.IP6Address.family:
4947         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4948                                    " IPv4 address must be given as secondary",
4949                                    errors.ECODE_INVAL)
4950       self.op.secondary_ip = primary_ip
4951
4952     secondary_ip = self.op.secondary_ip
4953     if not netutils.IP4Address.IsValid(secondary_ip):
4954       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4955                                  " address" % secondary_ip, errors.ECODE_INVAL)
4956
4957     node_list = cfg.GetNodeList()
4958     if not self.op.readd and node in node_list:
4959       raise errors.OpPrereqError("Node %s is already in the configuration" %
4960                                  node, errors.ECODE_EXISTS)
4961     elif self.op.readd and node not in node_list:
4962       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4963                                  errors.ECODE_NOENT)
4964
4965     self.changed_primary_ip = False
4966
4967     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4968       if self.op.readd and node == existing_node_name:
4969         if existing_node.secondary_ip != secondary_ip:
4970           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4971                                      " address configuration as before",
4972                                      errors.ECODE_INVAL)
4973         if existing_node.primary_ip != primary_ip:
4974           self.changed_primary_ip = True
4975
4976         continue
4977
4978       if (existing_node.primary_ip == primary_ip or
4979           existing_node.secondary_ip == primary_ip or
4980           existing_node.primary_ip == secondary_ip or
4981           existing_node.secondary_ip == secondary_ip):
4982         raise errors.OpPrereqError("New node ip address(es) conflict with"
4983                                    " existing node %s" % existing_node.name,
4984                                    errors.ECODE_NOTUNIQUE)
4985
4986     # After this 'if' block, None is no longer a valid value for the
4987     # _capable op attributes
4988     if self.op.readd:
4989       old_node = self.cfg.GetNodeInfo(node)
4990       assert old_node is not None, "Can't retrieve locked node %s" % node
4991       for attr in self._NFLAGS:
4992         if getattr(self.op, attr) is None:
4993           setattr(self.op, attr, getattr(old_node, attr))
4994     else:
4995       for attr in self._NFLAGS:
4996         if getattr(self.op, attr) is None:
4997           setattr(self.op, attr, True)
4998
4999     if self.op.readd and not self.op.vm_capable:
5000       pri, sec = cfg.GetNodeInstances(node)
5001       if pri or sec:
5002         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5003                                    " flag set to false, but it already holds"
5004                                    " instances" % node,
5005                                    errors.ECODE_STATE)
5006
5007     # check that the type of the node (single versus dual homed) is the
5008     # same as for the master
5009     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5010     master_singlehomed = myself.secondary_ip == myself.primary_ip
5011     newbie_singlehomed = secondary_ip == primary_ip
5012     if master_singlehomed != newbie_singlehomed:
5013       if master_singlehomed:
5014         raise errors.OpPrereqError("The master has no secondary ip but the"
5015                                    " new node has one",
5016                                    errors.ECODE_INVAL)
5017       else:
5018         raise errors.OpPrereqError("The master has a secondary ip but the"
5019                                    " new node doesn't have one",
5020                                    errors.ECODE_INVAL)
5021
5022     # checks reachability
5023     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5024       raise errors.OpPrereqError("Node not reachable by ping",
5025                                  errors.ECODE_ENVIRON)
5026
5027     if not newbie_singlehomed:
5028       # check reachability from my secondary ip to newbie's secondary ip
5029       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5030                            source=myself.secondary_ip):
5031         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5032                                    " based ping to node daemon port",
5033                                    errors.ECODE_ENVIRON)
5034
5035     if self.op.readd:
5036       exceptions = [node]
5037     else:
5038       exceptions = []
5039
5040     if self.op.master_capable:
5041       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5042     else:
5043       self.master_candidate = False
5044
5045     if self.op.readd:
5046       self.new_node = old_node
5047     else:
5048       node_group = cfg.LookupNodeGroup(self.op.group)
5049       self.new_node = objects.Node(name=node,
5050                                    primary_ip=primary_ip,
5051                                    secondary_ip=secondary_ip,
5052                                    master_candidate=self.master_candidate,
5053                                    offline=False, drained=False,
5054                                    group=node_group)
5055
5056     if self.op.ndparams:
5057       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5058
5059   def Exec(self, feedback_fn):
5060     """Adds the new node to the cluster.
5061
5062     """
5063     new_node = self.new_node
5064     node = new_node.name
5065
5066     # We adding a new node so we assume it's powered
5067     new_node.powered = True
5068
5069     # for re-adds, reset the offline/drained/master-candidate flags;
5070     # we need to reset here, otherwise offline would prevent RPC calls
5071     # later in the procedure; this also means that if the re-add
5072     # fails, we are left with a non-offlined, broken node
5073     if self.op.readd:
5074       new_node.drained = new_node.offline = False # pylint: disable=W0201
5075       self.LogInfo("Readding a node, the offline/drained flags were reset")
5076       # if we demote the node, we do cleanup later in the procedure
5077       new_node.master_candidate = self.master_candidate
5078       if self.changed_primary_ip:
5079         new_node.primary_ip = self.op.primary_ip
5080
5081     # copy the master/vm_capable flags
5082     for attr in self._NFLAGS:
5083       setattr(new_node, attr, getattr(self.op, attr))
5084
5085     # notify the user about any possible mc promotion
5086     if new_node.master_candidate:
5087       self.LogInfo("Node will be a master candidate")
5088
5089     if self.op.ndparams:
5090       new_node.ndparams = self.op.ndparams
5091     else:
5092       new_node.ndparams = {}
5093
5094     # check connectivity
5095     result = self.rpc.call_version([node])[node]
5096     result.Raise("Can't get version information from node %s" % node)
5097     if constants.PROTOCOL_VERSION == result.payload:
5098       logging.info("Communication to node %s fine, sw version %s match",
5099                    node, result.payload)
5100     else:
5101       raise errors.OpExecError("Version mismatch master version %s,"
5102                                " node version %s" %
5103                                (constants.PROTOCOL_VERSION, result.payload))
5104
5105     # Add node to our /etc/hosts, and add key to known_hosts
5106     if self.cfg.GetClusterInfo().modify_etc_hosts:
5107       master_node = self.cfg.GetMasterNode()
5108       result = self.rpc.call_etc_hosts_modify(master_node,
5109                                               constants.ETC_HOSTS_ADD,
5110                                               self.hostname.name,
5111                                               self.hostname.ip)
5112       result.Raise("Can't update hosts file with new host data")
5113
5114     if new_node.secondary_ip != new_node.primary_ip:
5115       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5116                                False)
5117
5118     node_verify_list = [self.cfg.GetMasterNode()]
5119     node_verify_param = {
5120       constants.NV_NODELIST: ([node], {}),
5121       # TODO: do a node-net-test as well?
5122     }
5123
5124     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5125                                        self.cfg.GetClusterName())
5126     for verifier in node_verify_list:
5127       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5128       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5129       if nl_payload:
5130         for failed in nl_payload:
5131           feedback_fn("ssh/hostname verification failed"
5132                       " (checking from %s): %s" %
5133                       (verifier, nl_payload[failed]))
5134         raise errors.OpExecError("ssh/hostname verification failed")
5135
5136     if self.op.readd:
5137       _RedistributeAncillaryFiles(self)
5138       self.context.ReaddNode(new_node)
5139       # make sure we redistribute the config
5140       self.cfg.Update(new_node, feedback_fn)
5141       # and make sure the new node will not have old files around
5142       if not new_node.master_candidate:
5143         result = self.rpc.call_node_demote_from_mc(new_node.name)
5144         msg = result.fail_msg
5145         if msg:
5146           self.LogWarning("Node failed to demote itself from master"
5147                           " candidate status: %s" % msg)
5148     else:
5149       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5150                                   additional_vm=self.op.vm_capable)
5151       self.context.AddNode(new_node, self.proc.GetECId())
5152
5153
5154 class LUNodeSetParams(LogicalUnit):
5155   """Modifies the parameters of a node.
5156
5157   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5158       to the node role (as _ROLE_*)
5159   @cvar _R2F: a dictionary from node role to tuples of flags
5160   @cvar _FLAGS: a list of attribute names corresponding to the flags
5161
5162   """
5163   HPATH = "node-modify"
5164   HTYPE = constants.HTYPE_NODE
5165   REQ_BGL = False
5166   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5167   _F2R = {
5168     (True, False, False): _ROLE_CANDIDATE,
5169     (False, True, False): _ROLE_DRAINED,
5170     (False, False, True): _ROLE_OFFLINE,
5171     (False, False, False): _ROLE_REGULAR,
5172     }
5173   _R2F = dict((v, k) for k, v in _F2R.items())
5174   _FLAGS = ["master_candidate", "drained", "offline"]
5175
5176   def CheckArguments(self):
5177     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5178     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5179                 self.op.master_capable, self.op.vm_capable,
5180                 self.op.secondary_ip, self.op.ndparams]
5181     if all_mods.count(None) == len(all_mods):
5182       raise errors.OpPrereqError("Please pass at least one modification",
5183                                  errors.ECODE_INVAL)
5184     if all_mods.count(True) > 1:
5185       raise errors.OpPrereqError("Can't set the node into more than one"
5186                                  " state at the same time",
5187                                  errors.ECODE_INVAL)
5188
5189     # Boolean value that tells us whether we might be demoting from MC
5190     self.might_demote = (self.op.master_candidate == False or
5191                          self.op.offline == True or
5192                          self.op.drained == True or
5193                          self.op.master_capable == False)
5194
5195     if self.op.secondary_ip:
5196       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5197         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5198                                    " address" % self.op.secondary_ip,
5199                                    errors.ECODE_INVAL)
5200
5201     self.lock_all = self.op.auto_promote and self.might_demote
5202     self.lock_instances = self.op.secondary_ip is not None
5203
5204   def ExpandNames(self):
5205     if self.lock_all:
5206       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5207     else:
5208       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5209
5210     if self.lock_instances:
5211       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5212
5213   def DeclareLocks(self, level):
5214     # If we have locked all instances, before waiting to lock nodes, release
5215     # all the ones living on nodes unrelated to the current operation.
5216     if level == locking.LEVEL_NODE and self.lock_instances:
5217       self.affected_instances = []
5218       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5219         instances_keep = []
5220
5221         # Build list of instances to release
5222         locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5223         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5224           if (instance.disk_template in constants.DTS_INT_MIRROR and
5225               self.op.node_name in instance.all_nodes):
5226             instances_keep.append(instance_name)
5227             self.affected_instances.append(instance)
5228
5229         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5230
5231         assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5232                 set(instances_keep))
5233
5234   def BuildHooksEnv(self):
5235     """Build hooks env.
5236
5237     This runs on the master node.
5238
5239     """
5240     return {
5241       "OP_TARGET": self.op.node_name,
5242       "MASTER_CANDIDATE": str(self.op.master_candidate),
5243       "OFFLINE": str(self.op.offline),
5244       "DRAINED": str(self.op.drained),
5245       "MASTER_CAPABLE": str(self.op.master_capable),
5246       "VM_CAPABLE": str(self.op.vm_capable),
5247       }
5248
5249   def BuildHooksNodes(self):
5250     """Build hooks nodes.
5251
5252     """
5253     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5254     return (nl, nl)
5255
5256   def CheckPrereq(self):
5257     """Check prerequisites.
5258
5259     This only checks the instance list against the existing names.
5260
5261     """
5262     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5263
5264     if (self.op.master_candidate is not None or
5265         self.op.drained is not None or
5266         self.op.offline is not None):
5267       # we can't change the master's node flags
5268       if self.op.node_name == self.cfg.GetMasterNode():
5269         raise errors.OpPrereqError("The master role can be changed"
5270                                    " only via master-failover",
5271                                    errors.ECODE_INVAL)
5272
5273     if self.op.master_candidate and not node.master_capable:
5274       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5275                                  " it a master candidate" % node.name,
5276                                  errors.ECODE_STATE)
5277
5278     if self.op.vm_capable == False:
5279       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5280       if ipri or isec:
5281         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5282                                    " the vm_capable flag" % node.name,
5283                                    errors.ECODE_STATE)
5284
5285     if node.master_candidate and self.might_demote and not self.lock_all:
5286       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5287       # check if after removing the current node, we're missing master
5288       # candidates
5289       (mc_remaining, mc_should, _) = \
5290           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5291       if mc_remaining < mc_should:
5292         raise errors.OpPrereqError("Not enough master candidates, please"
5293                                    " pass auto promote option to allow"
5294                                    " promotion", errors.ECODE_STATE)
5295
5296     self.old_flags = old_flags = (node.master_candidate,
5297                                   node.drained, node.offline)
5298     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5299     self.old_role = old_role = self._F2R[old_flags]
5300
5301     # Check for ineffective changes
5302     for attr in self._FLAGS:
5303       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5304         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5305         setattr(self.op, attr, None)
5306
5307     # Past this point, any flag change to False means a transition
5308     # away from the respective state, as only real changes are kept
5309
5310     # TODO: We might query the real power state if it supports OOB
5311     if _SupportsOob(self.cfg, node):
5312       if self.op.offline is False and not (node.powered or
5313                                            self.op.powered == True):
5314         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5315                                     " offline status can be reset") %
5316                                    self.op.node_name)
5317     elif self.op.powered is not None:
5318       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5319                                   " as it does not support out-of-band"
5320                                   " handling") % self.op.node_name)
5321
5322     # If we're being deofflined/drained, we'll MC ourself if needed
5323     if (self.op.drained == False or self.op.offline == False or
5324         (self.op.master_capable and not node.master_capable)):
5325       if _DecideSelfPromotion(self):
5326         self.op.master_candidate = True
5327         self.LogInfo("Auto-promoting node to master candidate")
5328
5329     # If we're no longer master capable, we'll demote ourselves from MC
5330     if self.op.master_capable == False and node.master_candidate:
5331       self.LogInfo("Demoting from master candidate")
5332       self.op.master_candidate = False
5333
5334     # Compute new role
5335     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5336     if self.op.master_candidate:
5337       new_role = self._ROLE_CANDIDATE
5338     elif self.op.drained:
5339       new_role = self._ROLE_DRAINED
5340     elif self.op.offline:
5341       new_role = self._ROLE_OFFLINE
5342     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5343       # False is still in new flags, which means we're un-setting (the
5344       # only) True flag
5345       new_role = self._ROLE_REGULAR
5346     else: # no new flags, nothing, keep old role
5347       new_role = old_role
5348
5349     self.new_role = new_role
5350
5351     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5352       # Trying to transition out of offline status
5353       result = self.rpc.call_version([node.name])[node.name]
5354       if result.fail_msg:
5355         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5356                                    " to report its version: %s" %
5357                                    (node.name, result.fail_msg),
5358                                    errors.ECODE_STATE)
5359       else:
5360         self.LogWarning("Transitioning node from offline to online state"
5361                         " without using re-add. Please make sure the node"
5362                         " is healthy!")
5363
5364     if self.op.secondary_ip:
5365       # Ok even without locking, because this can't be changed by any LU
5366       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5367       master_singlehomed = master.secondary_ip == master.primary_ip
5368       if master_singlehomed and self.op.secondary_ip:
5369         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5370                                    " homed cluster", errors.ECODE_INVAL)
5371
5372       if node.offline:
5373         if self.affected_instances:
5374           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5375                                      " node has instances (%s) configured"
5376                                      " to use it" % self.affected_instances)
5377       else:
5378         # On online nodes, check that no instances are running, and that
5379         # the node has the new ip and we can reach it.
5380         for instance in self.affected_instances:
5381           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5382
5383         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5384         if master.name != node.name:
5385           # check reachability from master secondary ip to new secondary ip
5386           if not netutils.TcpPing(self.op.secondary_ip,
5387                                   constants.DEFAULT_NODED_PORT,
5388                                   source=master.secondary_ip):
5389             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5390                                        " based ping to node daemon port",
5391                                        errors.ECODE_ENVIRON)
5392
5393     if self.op.ndparams:
5394       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5395       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5396       self.new_ndparams = new_ndparams
5397
5398   def Exec(self, feedback_fn):
5399     """Modifies a node.
5400
5401     """
5402     node = self.node
5403     old_role = self.old_role
5404     new_role = self.new_role
5405
5406     result = []
5407
5408     if self.op.ndparams:
5409       node.ndparams = self.new_ndparams
5410
5411     if self.op.powered is not None:
5412       node.powered = self.op.powered
5413
5414     for attr in ["master_capable", "vm_capable"]:
5415       val = getattr(self.op, attr)
5416       if val is not None:
5417         setattr(node, attr, val)
5418         result.append((attr, str(val)))
5419
5420     if new_role != old_role:
5421       # Tell the node to demote itself, if no longer MC and not offline
5422       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5423         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5424         if msg:
5425           self.LogWarning("Node failed to demote itself: %s", msg)
5426
5427       new_flags = self._R2F[new_role]
5428       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5429         if of != nf:
5430           result.append((desc, str(nf)))
5431       (node.master_candidate, node.drained, node.offline) = new_flags
5432
5433       # we locked all nodes, we adjust the CP before updating this node
5434       if self.lock_all:
5435         _AdjustCandidatePool(self, [node.name])
5436
5437     if self.op.secondary_ip:
5438       node.secondary_ip = self.op.secondary_ip
5439       result.append(("secondary_ip", self.op.secondary_ip))
5440
5441     # this will trigger configuration file update, if needed
5442     self.cfg.Update(node, feedback_fn)
5443
5444     # this will trigger job queue propagation or cleanup if the mc
5445     # flag changed
5446     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5447       self.context.ReaddNode(node)
5448
5449     return result
5450
5451
5452 class LUNodePowercycle(NoHooksLU):
5453   """Powercycles a node.
5454
5455   """
5456   REQ_BGL = False
5457
5458   def CheckArguments(self):
5459     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5460     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5461       raise errors.OpPrereqError("The node is the master and the force"
5462                                  " parameter was not set",
5463                                  errors.ECODE_INVAL)
5464
5465   def ExpandNames(self):
5466     """Locking for PowercycleNode.
5467
5468     This is a last-resort option and shouldn't block on other
5469     jobs. Therefore, we grab no locks.
5470
5471     """
5472     self.needed_locks = {}
5473
5474   def Exec(self, feedback_fn):
5475     """Reboots a node.
5476
5477     """
5478     result = self.rpc.call_node_powercycle(self.op.node_name,
5479                                            self.cfg.GetHypervisorType())
5480     result.Raise("Failed to schedule the reboot")
5481     return result.payload
5482
5483
5484 class LUClusterQuery(NoHooksLU):
5485   """Query cluster configuration.
5486
5487   """
5488   REQ_BGL = False
5489
5490   def ExpandNames(self):
5491     self.needed_locks = {}
5492
5493   def Exec(self, feedback_fn):
5494     """Return cluster config.
5495
5496     """
5497     cluster = self.cfg.GetClusterInfo()
5498     os_hvp = {}
5499
5500     # Filter just for enabled hypervisors
5501     for os_name, hv_dict in cluster.os_hvp.items():
5502       os_hvp[os_name] = {}
5503       for hv_name, hv_params in hv_dict.items():
5504         if hv_name in cluster.enabled_hypervisors:
5505           os_hvp[os_name][hv_name] = hv_params
5506
5507     # Convert ip_family to ip_version
5508     primary_ip_version = constants.IP4_VERSION
5509     if cluster.primary_ip_family == netutils.IP6Address.family:
5510       primary_ip_version = constants.IP6_VERSION
5511
5512     result = {
5513       "software_version": constants.RELEASE_VERSION,
5514       "protocol_version": constants.PROTOCOL_VERSION,
5515       "config_version": constants.CONFIG_VERSION,
5516       "os_api_version": max(constants.OS_API_VERSIONS),
5517       "export_version": constants.EXPORT_VERSION,
5518       "architecture": (platform.architecture()[0], platform.machine()),
5519       "name": cluster.cluster_name,
5520       "master": cluster.master_node,
5521       "default_hypervisor": cluster.enabled_hypervisors[0],
5522       "enabled_hypervisors": cluster.enabled_hypervisors,
5523       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5524                         for hypervisor_name in cluster.enabled_hypervisors]),
5525       "os_hvp": os_hvp,
5526       "beparams": cluster.beparams,
5527       "osparams": cluster.osparams,
5528       "nicparams": cluster.nicparams,
5529       "ndparams": cluster.ndparams,
5530       "candidate_pool_size": cluster.candidate_pool_size,
5531       "master_netdev": cluster.master_netdev,
5532       "volume_group_name": cluster.volume_group_name,
5533       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5534       "file_storage_dir": cluster.file_storage_dir,
5535       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5536       "maintain_node_health": cluster.maintain_node_health,
5537       "ctime": cluster.ctime,
5538       "mtime": cluster.mtime,
5539       "uuid": cluster.uuid,
5540       "tags": list(cluster.GetTags()),
5541       "uid_pool": cluster.uid_pool,
5542       "default_iallocator": cluster.default_iallocator,
5543       "reserved_lvs": cluster.reserved_lvs,
5544       "primary_ip_version": primary_ip_version,
5545       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5546       "hidden_os": cluster.hidden_os,
5547       "blacklisted_os": cluster.blacklisted_os,
5548       }
5549
5550     return result
5551
5552
5553 class LUClusterConfigQuery(NoHooksLU):
5554   """Return configuration values.
5555
5556   """
5557   REQ_BGL = False
5558   _FIELDS_DYNAMIC = utils.FieldSet()
5559   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5560                                   "watcher_pause", "volume_group_name")
5561
5562   def CheckArguments(self):
5563     _CheckOutputFields(static=self._FIELDS_STATIC,
5564                        dynamic=self._FIELDS_DYNAMIC,
5565                        selected=self.op.output_fields)
5566
5567   def ExpandNames(self):
5568     self.needed_locks = {}
5569
5570   def Exec(self, feedback_fn):
5571     """Dump a representation of the cluster config to the standard output.
5572
5573     """
5574     values = []
5575     for field in self.op.output_fields:
5576       if field == "cluster_name":
5577         entry = self.cfg.GetClusterName()
5578       elif field == "master_node":
5579         entry = self.cfg.GetMasterNode()
5580       elif field == "drain_flag":
5581         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5582       elif field == "watcher_pause":
5583         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5584       elif field == "volume_group_name":
5585         entry = self.cfg.GetVGName()
5586       else:
5587         raise errors.ParameterError(field)
5588       values.append(entry)
5589     return values
5590
5591
5592 class LUInstanceActivateDisks(NoHooksLU):
5593   """Bring up an instance's disks.
5594
5595   """
5596   REQ_BGL = False
5597
5598   def ExpandNames(self):
5599     self._ExpandAndLockInstance()
5600     self.needed_locks[locking.LEVEL_NODE] = []
5601     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5602
5603   def DeclareLocks(self, level):
5604     if level == locking.LEVEL_NODE:
5605       self._LockInstancesNodes()
5606
5607   def CheckPrereq(self):
5608     """Check prerequisites.
5609
5610     This checks that the instance is in the cluster.
5611
5612     """
5613     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5614     assert self.instance is not None, \
5615       "Cannot retrieve locked instance %s" % self.op.instance_name
5616     _CheckNodeOnline(self, self.instance.primary_node)
5617
5618   def Exec(self, feedback_fn):
5619     """Activate the disks.
5620
5621     """
5622     disks_ok, disks_info = \
5623               _AssembleInstanceDisks(self, self.instance,
5624                                      ignore_size=self.op.ignore_size)
5625     if not disks_ok:
5626       raise errors.OpExecError("Cannot activate block devices")
5627
5628     return disks_info
5629
5630
5631 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5632                            ignore_size=False):
5633   """Prepare the block devices for an instance.
5634
5635   This sets up the block devices on all nodes.
5636
5637   @type lu: L{LogicalUnit}
5638   @param lu: the logical unit on whose behalf we execute
5639   @type instance: L{objects.Instance}
5640   @param instance: the instance for whose disks we assemble
5641   @type disks: list of L{objects.Disk} or None
5642   @param disks: which disks to assemble (or all, if None)
5643   @type ignore_secondaries: boolean
5644   @param ignore_secondaries: if true, errors on secondary nodes
5645       won't result in an error return from the function
5646   @type ignore_size: boolean
5647   @param ignore_size: if true, the current known size of the disk
5648       will not be used during the disk activation, useful for cases
5649       when the size is wrong
5650   @return: False if the operation failed, otherwise a list of
5651       (host, instance_visible_name, node_visible_name)
5652       with the mapping from node devices to instance devices
5653
5654   """
5655   device_info = []
5656   disks_ok = True
5657   iname = instance.name
5658   disks = _ExpandCheckDisks(instance, disks)
5659
5660   # With the two passes mechanism we try to reduce the window of
5661   # opportunity for the race condition of switching DRBD to primary
5662   # before handshaking occured, but we do not eliminate it
5663
5664   # The proper fix would be to wait (with some limits) until the
5665   # connection has been made and drbd transitions from WFConnection
5666   # into any other network-connected state (Connected, SyncTarget,
5667   # SyncSource, etc.)
5668
5669   # 1st pass, assemble on all nodes in secondary mode
5670   for idx, inst_disk in enumerate(disks):
5671     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5672       if ignore_size:
5673         node_disk = node_disk.Copy()
5674         node_disk.UnsetSize()
5675       lu.cfg.SetDiskID(node_disk, node)
5676       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5677       msg = result.fail_msg
5678       if msg:
5679         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5680                            " (is_primary=False, pass=1): %s",
5681                            inst_disk.iv_name, node, msg)
5682         if not ignore_secondaries:
5683           disks_ok = False
5684
5685   # FIXME: race condition on drbd migration to primary
5686
5687   # 2nd pass, do only the primary node
5688   for idx, inst_disk in enumerate(disks):
5689     dev_path = None
5690
5691     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5692       if node != instance.primary_node:
5693         continue
5694       if ignore_size:
5695         node_disk = node_disk.Copy()
5696         node_disk.UnsetSize()
5697       lu.cfg.SetDiskID(node_disk, node)
5698       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5699       msg = result.fail_msg
5700       if msg:
5701         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5702                            " (is_primary=True, pass=2): %s",
5703                            inst_disk.iv_name, node, msg)
5704         disks_ok = False
5705       else:
5706         dev_path = result.payload
5707
5708     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5709
5710   # leave the disks configured for the primary node
5711   # this is a workaround that would be fixed better by
5712   # improving the logical/physical id handling
5713   for disk in disks:
5714     lu.cfg.SetDiskID(disk, instance.primary_node)
5715
5716   return disks_ok, device_info
5717
5718
5719 def _StartInstanceDisks(lu, instance, force):
5720   """Start the disks of an instance.
5721
5722   """
5723   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5724                                            ignore_secondaries=force)
5725   if not disks_ok:
5726     _ShutdownInstanceDisks(lu, instance)
5727     if force is not None and not force:
5728       lu.proc.LogWarning("", hint="If the message above refers to a"
5729                          " secondary node,"
5730                          " you can retry the operation using '--force'.")
5731     raise errors.OpExecError("Disk consistency error")
5732
5733
5734 class LUInstanceDeactivateDisks(NoHooksLU):
5735   """Shutdown an instance's disks.
5736
5737   """
5738   REQ_BGL = False
5739
5740   def ExpandNames(self):
5741     self._ExpandAndLockInstance()
5742     self.needed_locks[locking.LEVEL_NODE] = []
5743     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5744
5745   def DeclareLocks(self, level):
5746     if level == locking.LEVEL_NODE:
5747       self._LockInstancesNodes()
5748
5749   def CheckPrereq(self):
5750     """Check prerequisites.
5751
5752     This checks that the instance is in the cluster.
5753
5754     """
5755     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5756     assert self.instance is not None, \
5757       "Cannot retrieve locked instance %s" % self.op.instance_name
5758
5759   def Exec(self, feedback_fn):
5760     """Deactivate the disks
5761
5762     """
5763     instance = self.instance
5764     if self.op.force:
5765       _ShutdownInstanceDisks(self, instance)
5766     else:
5767       _SafeShutdownInstanceDisks(self, instance)
5768
5769
5770 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5771   """Shutdown block devices of an instance.
5772
5773   This function checks if an instance is running, before calling
5774   _ShutdownInstanceDisks.
5775
5776   """
5777   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5778   _ShutdownInstanceDisks(lu, instance, disks=disks)
5779
5780
5781 def _ExpandCheckDisks(instance, disks):
5782   """Return the instance disks selected by the disks list
5783
5784   @type disks: list of L{objects.Disk} or None
5785   @param disks: selected disks
5786   @rtype: list of L{objects.Disk}
5787   @return: selected instance disks to act on
5788
5789   """
5790   if disks is None:
5791     return instance.disks
5792   else:
5793     if not set(disks).issubset(instance.disks):
5794       raise errors.ProgrammerError("Can only act on disks belonging to the"
5795                                    " target instance")
5796     return disks
5797
5798
5799 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5800   """Shutdown block devices of an instance.
5801
5802   This does the shutdown on all nodes of the instance.
5803
5804   If the ignore_primary is false, errors on the primary node are
5805   ignored.
5806
5807   """
5808   all_result = True
5809   disks = _ExpandCheckDisks(instance, disks)
5810
5811   for disk in disks:
5812     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5813       lu.cfg.SetDiskID(top_disk, node)
5814       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5815       msg = result.fail_msg
5816       if msg:
5817         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5818                       disk.iv_name, node, msg)
5819         if ((node == instance.primary_node and not ignore_primary) or
5820             (node != instance.primary_node and not result.offline)):
5821           all_result = False
5822   return all_result
5823
5824
5825 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5826   """Checks if a node has enough free memory.
5827
5828   This function check if a given node has the needed amount of free
5829   memory. In case the node has less memory or we cannot get the
5830   information from the node, this function raise an OpPrereqError
5831   exception.
5832
5833   @type lu: C{LogicalUnit}
5834   @param lu: a logical unit from which we get configuration data
5835   @type node: C{str}
5836   @param node: the node to check
5837   @type reason: C{str}
5838   @param reason: string to use in the error message
5839   @type requested: C{int}
5840   @param requested: the amount of memory in MiB to check for
5841   @type hypervisor_name: C{str}
5842   @param hypervisor_name: the hypervisor to ask for memory stats
5843   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5844       we cannot check the node
5845
5846   """
5847   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5848   nodeinfo[node].Raise("Can't get data from node %s" % node,
5849                        prereq=True, ecode=errors.ECODE_ENVIRON)
5850   free_mem = nodeinfo[node].payload.get("memory_free", None)
5851   if not isinstance(free_mem, int):
5852     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5853                                " was '%s'" % (node, free_mem),
5854                                errors.ECODE_ENVIRON)
5855   if requested > free_mem:
5856     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5857                                " needed %s MiB, available %s MiB" %
5858                                (node, reason, requested, free_mem),
5859                                errors.ECODE_NORES)
5860
5861
5862 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5863   """Checks if nodes have enough free disk space in the all VGs.
5864
5865   This function check if all given nodes have the needed amount of
5866   free disk. In case any node has less disk or we cannot get the
5867   information from the node, this function raise an OpPrereqError
5868   exception.
5869
5870   @type lu: C{LogicalUnit}
5871   @param lu: a logical unit from which we get configuration data
5872   @type nodenames: C{list}
5873   @param nodenames: the list of node names to check
5874   @type req_sizes: C{dict}
5875   @param req_sizes: the hash of vg and corresponding amount of disk in
5876       MiB to check for
5877   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5878       or we cannot check the node
5879
5880   """
5881   for vg, req_size in req_sizes.items():
5882     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5883
5884
5885 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5886   """Checks if nodes have enough free disk space in the specified VG.
5887
5888   This function check if all given nodes have the needed amount of
5889   free disk. In case any node has less disk or we cannot get the
5890   information from the node, this function raise an OpPrereqError
5891   exception.
5892
5893   @type lu: C{LogicalUnit}
5894   @param lu: a logical unit from which we get configuration data
5895   @type nodenames: C{list}
5896   @param nodenames: the list of node names to check
5897   @type vg: C{str}
5898   @param vg: the volume group to check
5899   @type requested: C{int}
5900   @param requested: the amount of disk in MiB to check for
5901   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5902       or we cannot check the node
5903
5904   """
5905   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5906   for node in nodenames:
5907     info = nodeinfo[node]
5908     info.Raise("Cannot get current information from node %s" % node,
5909                prereq=True, ecode=errors.ECODE_ENVIRON)
5910     vg_free = info.payload.get("vg_free", None)
5911     if not isinstance(vg_free, int):
5912       raise errors.OpPrereqError("Can't compute free disk space on node"
5913                                  " %s for vg %s, result was '%s'" %
5914                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5915     if requested > vg_free:
5916       raise errors.OpPrereqError("Not enough disk space on target node %s"
5917                                  " vg %s: required %d MiB, available %d MiB" %
5918                                  (node, vg, requested, vg_free),
5919                                  errors.ECODE_NORES)
5920
5921
5922 class LUInstanceStartup(LogicalUnit):
5923   """Starts an instance.
5924
5925   """
5926   HPATH = "instance-start"
5927   HTYPE = constants.HTYPE_INSTANCE
5928   REQ_BGL = False
5929
5930   def CheckArguments(self):
5931     # extra beparams
5932     if self.op.beparams:
5933       # fill the beparams dict
5934       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5935
5936   def ExpandNames(self):
5937     self._ExpandAndLockInstance()
5938
5939   def BuildHooksEnv(self):
5940     """Build hooks env.
5941
5942     This runs on master, primary and secondary nodes of the instance.
5943
5944     """
5945     env = {
5946       "FORCE": self.op.force,
5947       }
5948
5949     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5950
5951     return env
5952
5953   def BuildHooksNodes(self):
5954     """Build hooks nodes.
5955
5956     """
5957     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5958     return (nl, nl)
5959
5960   def CheckPrereq(self):
5961     """Check prerequisites.
5962
5963     This checks that the instance is in the cluster.
5964
5965     """
5966     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5967     assert self.instance is not None, \
5968       "Cannot retrieve locked instance %s" % self.op.instance_name
5969
5970     # extra hvparams
5971     if self.op.hvparams:
5972       # check hypervisor parameter syntax (locally)
5973       cluster = self.cfg.GetClusterInfo()
5974       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5975       filled_hvp = cluster.FillHV(instance)
5976       filled_hvp.update(self.op.hvparams)
5977       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5978       hv_type.CheckParameterSyntax(filled_hvp)
5979       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5980
5981     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5982
5983     if self.primary_offline and self.op.ignore_offline_nodes:
5984       self.proc.LogWarning("Ignoring offline primary node")
5985
5986       if self.op.hvparams or self.op.beparams:
5987         self.proc.LogWarning("Overridden parameters are ignored")
5988     else:
5989       _CheckNodeOnline(self, instance.primary_node)
5990
5991       bep = self.cfg.GetClusterInfo().FillBE(instance)
5992
5993       # check bridges existence
5994       _CheckInstanceBridgesExist(self, instance)
5995
5996       remote_info = self.rpc.call_instance_info(instance.primary_node,
5997                                                 instance.name,
5998                                                 instance.hypervisor)
5999       remote_info.Raise("Error checking node %s" % instance.primary_node,
6000                         prereq=True, ecode=errors.ECODE_ENVIRON)
6001       if not remote_info.payload: # not running already
6002         _CheckNodeFreeMemory(self, instance.primary_node,
6003                              "starting instance %s" % instance.name,
6004                              bep[constants.BE_MEMORY], instance.hypervisor)
6005
6006   def Exec(self, feedback_fn):
6007     """Start the instance.
6008
6009     """
6010     instance = self.instance
6011     force = self.op.force
6012
6013     if not self.op.no_remember:
6014       self.cfg.MarkInstanceUp(instance.name)
6015
6016     if self.primary_offline:
6017       assert self.op.ignore_offline_nodes
6018       self.proc.LogInfo("Primary node offline, marked instance as started")
6019     else:
6020       node_current = instance.primary_node
6021
6022       _StartInstanceDisks(self, instance, force)
6023
6024       result = self.rpc.call_instance_start(node_current, instance,
6025                                             self.op.hvparams, self.op.beparams,
6026                                             self.op.startup_paused)
6027       msg = result.fail_msg
6028       if msg:
6029         _ShutdownInstanceDisks(self, instance)
6030         raise errors.OpExecError("Could not start instance: %s" % msg)
6031
6032
6033 class LUInstanceReboot(LogicalUnit):
6034   """Reboot an instance.
6035
6036   """
6037   HPATH = "instance-reboot"
6038   HTYPE = constants.HTYPE_INSTANCE
6039   REQ_BGL = False
6040
6041   def ExpandNames(self):
6042     self._ExpandAndLockInstance()
6043
6044   def BuildHooksEnv(self):
6045     """Build hooks env.
6046
6047     This runs on master, primary and secondary nodes of the instance.
6048
6049     """
6050     env = {
6051       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6052       "REBOOT_TYPE": self.op.reboot_type,
6053       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6054       }
6055
6056     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6057
6058     return env
6059
6060   def BuildHooksNodes(self):
6061     """Build hooks nodes.
6062
6063     """
6064     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6065     return (nl, nl)
6066
6067   def CheckPrereq(self):
6068     """Check prerequisites.
6069
6070     This checks that the instance is in the cluster.
6071
6072     """
6073     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6074     assert self.instance is not None, \
6075       "Cannot retrieve locked instance %s" % self.op.instance_name
6076
6077     _CheckNodeOnline(self, instance.primary_node)
6078
6079     # check bridges existence
6080     _CheckInstanceBridgesExist(self, instance)
6081
6082   def Exec(self, feedback_fn):
6083     """Reboot the instance.
6084
6085     """
6086     instance = self.instance
6087     ignore_secondaries = self.op.ignore_secondaries
6088     reboot_type = self.op.reboot_type
6089
6090     remote_info = self.rpc.call_instance_info(instance.primary_node,
6091                                               instance.name,
6092                                               instance.hypervisor)
6093     remote_info.Raise("Error checking node %s" % instance.primary_node)
6094     instance_running = bool(remote_info.payload)
6095
6096     node_current = instance.primary_node
6097
6098     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6099                                             constants.INSTANCE_REBOOT_HARD]:
6100       for disk in instance.disks:
6101         self.cfg.SetDiskID(disk, node_current)
6102       result = self.rpc.call_instance_reboot(node_current, instance,
6103                                              reboot_type,
6104                                              self.op.shutdown_timeout)
6105       result.Raise("Could not reboot instance")
6106     else:
6107       if instance_running:
6108         result = self.rpc.call_instance_shutdown(node_current, instance,
6109                                                  self.op.shutdown_timeout)
6110         result.Raise("Could not shutdown instance for full reboot")
6111         _ShutdownInstanceDisks(self, instance)
6112       else:
6113         self.LogInfo("Instance %s was already stopped, starting now",
6114                      instance.name)
6115       _StartInstanceDisks(self, instance, ignore_secondaries)
6116       result = self.rpc.call_instance_start(node_current, instance,
6117                                             None, None, False)
6118       msg = result.fail_msg
6119       if msg:
6120         _ShutdownInstanceDisks(self, instance)
6121         raise errors.OpExecError("Could not start instance for"
6122                                  " full reboot: %s" % msg)
6123
6124     self.cfg.MarkInstanceUp(instance.name)
6125
6126
6127 class LUInstanceShutdown(LogicalUnit):
6128   """Shutdown an instance.
6129
6130   """
6131   HPATH = "instance-stop"
6132   HTYPE = constants.HTYPE_INSTANCE
6133   REQ_BGL = False
6134
6135   def ExpandNames(self):
6136     self._ExpandAndLockInstance()
6137
6138   def BuildHooksEnv(self):
6139     """Build hooks env.
6140
6141     This runs on master, primary and secondary nodes of the instance.
6142
6143     """
6144     env = _BuildInstanceHookEnvByObject(self, self.instance)
6145     env["TIMEOUT"] = self.op.timeout
6146     return env
6147
6148   def BuildHooksNodes(self):
6149     """Build hooks nodes.
6150
6151     """
6152     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6153     return (nl, nl)
6154
6155   def CheckPrereq(self):
6156     """Check prerequisites.
6157
6158     This checks that the instance is in the cluster.
6159
6160     """
6161     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6162     assert self.instance is not None, \
6163       "Cannot retrieve locked instance %s" % self.op.instance_name
6164
6165     self.primary_offline = \
6166       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6167
6168     if self.primary_offline and self.op.ignore_offline_nodes:
6169       self.proc.LogWarning("Ignoring offline primary node")
6170     else:
6171       _CheckNodeOnline(self, self.instance.primary_node)
6172
6173   def Exec(self, feedback_fn):
6174     """Shutdown the instance.
6175
6176     """
6177     instance = self.instance
6178     node_current = instance.primary_node
6179     timeout = self.op.timeout
6180
6181     if not self.op.no_remember:
6182       self.cfg.MarkInstanceDown(instance.name)
6183
6184     if self.primary_offline:
6185       assert self.op.ignore_offline_nodes
6186       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6187     else:
6188       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6189       msg = result.fail_msg
6190       if msg:
6191         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6192
6193       _ShutdownInstanceDisks(self, instance)
6194
6195
6196 class LUInstanceReinstall(LogicalUnit):
6197   """Reinstall an instance.
6198
6199   """
6200   HPATH = "instance-reinstall"
6201   HTYPE = constants.HTYPE_INSTANCE
6202   REQ_BGL = False
6203
6204   def ExpandNames(self):
6205     self._ExpandAndLockInstance()
6206
6207   def BuildHooksEnv(self):
6208     """Build hooks env.
6209
6210     This runs on master, primary and secondary nodes of the instance.
6211
6212     """
6213     return _BuildInstanceHookEnvByObject(self, self.instance)
6214
6215   def BuildHooksNodes(self):
6216     """Build hooks nodes.
6217
6218     """
6219     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6220     return (nl, nl)
6221
6222   def CheckPrereq(self):
6223     """Check prerequisites.
6224
6225     This checks that the instance is in the cluster and is not running.
6226
6227     """
6228     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6229     assert instance is not None, \
6230       "Cannot retrieve locked instance %s" % self.op.instance_name
6231     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6232                      " offline, cannot reinstall")
6233     for node in instance.secondary_nodes:
6234       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6235                        " cannot reinstall")
6236
6237     if instance.disk_template == constants.DT_DISKLESS:
6238       raise errors.OpPrereqError("Instance '%s' has no disks" %
6239                                  self.op.instance_name,
6240                                  errors.ECODE_INVAL)
6241     _CheckInstanceDown(self, instance, "cannot reinstall")
6242
6243     if self.op.os_type is not None:
6244       # OS verification
6245       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6246       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6247       instance_os = self.op.os_type
6248     else:
6249       instance_os = instance.os
6250
6251     nodelist = list(instance.all_nodes)
6252
6253     if self.op.osparams:
6254       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6255       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6256       self.os_inst = i_osdict # the new dict (without defaults)
6257     else:
6258       self.os_inst = None
6259
6260     self.instance = instance
6261
6262   def Exec(self, feedback_fn):
6263     """Reinstall the instance.
6264
6265     """
6266     inst = self.instance
6267
6268     if self.op.os_type is not None:
6269       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6270       inst.os = self.op.os_type
6271       # Write to configuration
6272       self.cfg.Update(inst, feedback_fn)
6273
6274     _StartInstanceDisks(self, inst, None)
6275     try:
6276       feedback_fn("Running the instance OS create scripts...")
6277       # FIXME: pass debug option from opcode to backend
6278       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6279                                              self.op.debug_level,
6280                                              osparams=self.os_inst)
6281       result.Raise("Could not install OS for instance %s on node %s" %
6282                    (inst.name, inst.primary_node))
6283     finally:
6284       _ShutdownInstanceDisks(self, inst)
6285
6286
6287 class LUInstanceRecreateDisks(LogicalUnit):
6288   """Recreate an instance's missing disks.
6289
6290   """
6291   HPATH = "instance-recreate-disks"
6292   HTYPE = constants.HTYPE_INSTANCE
6293   REQ_BGL = False
6294
6295   def CheckArguments(self):
6296     # normalise the disk list
6297     self.op.disks = sorted(frozenset(self.op.disks))
6298
6299   def ExpandNames(self):
6300     self._ExpandAndLockInstance()
6301     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6302     if self.op.nodes:
6303       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6304       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6305     else:
6306       self.needed_locks[locking.LEVEL_NODE] = []
6307
6308   def DeclareLocks(self, level):
6309     if level == locking.LEVEL_NODE:
6310       # if we replace the nodes, we only need to lock the old primary,
6311       # otherwise we need to lock all nodes for disk re-creation
6312       primary_only = bool(self.op.nodes)
6313       self._LockInstancesNodes(primary_only=primary_only)
6314
6315   def BuildHooksEnv(self):
6316     """Build hooks env.
6317
6318     This runs on master, primary and secondary nodes of the instance.
6319
6320     """
6321     return _BuildInstanceHookEnvByObject(self, self.instance)
6322
6323   def BuildHooksNodes(self):
6324     """Build hooks nodes.
6325
6326     """
6327     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6328     return (nl, nl)
6329
6330   def CheckPrereq(self):
6331     """Check prerequisites.
6332
6333     This checks that the instance is in the cluster and is not running.
6334
6335     """
6336     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6337     assert instance is not None, \
6338       "Cannot retrieve locked instance %s" % self.op.instance_name
6339     if self.op.nodes:
6340       if len(self.op.nodes) != len(instance.all_nodes):
6341         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6342                                    " %d replacement nodes were specified" %
6343                                    (instance.name, len(instance.all_nodes),
6344                                     len(self.op.nodes)),
6345                                    errors.ECODE_INVAL)
6346       assert instance.disk_template != constants.DT_DRBD8 or \
6347           len(self.op.nodes) == 2
6348       assert instance.disk_template != constants.DT_PLAIN or \
6349           len(self.op.nodes) == 1
6350       primary_node = self.op.nodes[0]
6351     else:
6352       primary_node = instance.primary_node
6353     _CheckNodeOnline(self, primary_node)
6354
6355     if instance.disk_template == constants.DT_DISKLESS:
6356       raise errors.OpPrereqError("Instance '%s' has no disks" %
6357                                  self.op.instance_name, errors.ECODE_INVAL)
6358     # if we replace nodes *and* the old primary is offline, we don't
6359     # check
6360     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6361     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6362     if not (self.op.nodes and old_pnode.offline):
6363       _CheckInstanceDown(self, instance, "cannot recreate disks")
6364
6365     if not self.op.disks:
6366       self.op.disks = range(len(instance.disks))
6367     else:
6368       for idx in self.op.disks:
6369         if idx >= len(instance.disks):
6370           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6371                                      errors.ECODE_INVAL)
6372     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6373       raise errors.OpPrereqError("Can't recreate disks partially and"
6374                                  " change the nodes at the same time",
6375                                  errors.ECODE_INVAL)
6376     self.instance = instance
6377
6378   def Exec(self, feedback_fn):
6379     """Recreate the disks.
6380
6381     """
6382     instance = self.instance
6383
6384     to_skip = []
6385     mods = [] # keeps track of needed logical_id changes
6386
6387     for idx, disk in enumerate(instance.disks):
6388       if idx not in self.op.disks: # disk idx has not been passed in
6389         to_skip.append(idx)
6390         continue
6391       # update secondaries for disks, if needed
6392       if self.op.nodes:
6393         if disk.dev_type == constants.LD_DRBD8:
6394           # need to update the nodes and minors
6395           assert len(self.op.nodes) == 2
6396           assert len(disk.logical_id) == 6 # otherwise disk internals
6397                                            # have changed
6398           (_, _, old_port, _, _, old_secret) = disk.logical_id
6399           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6400           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6401                     new_minors[0], new_minors[1], old_secret)
6402           assert len(disk.logical_id) == len(new_id)
6403           mods.append((idx, new_id))
6404
6405     # now that we have passed all asserts above, we can apply the mods
6406     # in a single run (to avoid partial changes)
6407     for idx, new_id in mods:
6408       instance.disks[idx].logical_id = new_id
6409
6410     # change primary node, if needed
6411     if self.op.nodes:
6412       instance.primary_node = self.op.nodes[0]
6413       self.LogWarning("Changing the instance's nodes, you will have to"
6414                       " remove any disks left on the older nodes manually")
6415
6416     if self.op.nodes:
6417       self.cfg.Update(instance, feedback_fn)
6418
6419     _CreateDisks(self, instance, to_skip=to_skip)
6420
6421
6422 class LUInstanceRename(LogicalUnit):
6423   """Rename an instance.
6424
6425   """
6426   HPATH = "instance-rename"
6427   HTYPE = constants.HTYPE_INSTANCE
6428
6429   def CheckArguments(self):
6430     """Check arguments.
6431
6432     """
6433     if self.op.ip_check and not self.op.name_check:
6434       # TODO: make the ip check more flexible and not depend on the name check
6435       raise errors.OpPrereqError("IP address check requires a name check",
6436                                  errors.ECODE_INVAL)
6437
6438   def BuildHooksEnv(self):
6439     """Build hooks env.
6440
6441     This runs on master, primary and secondary nodes of the instance.
6442
6443     """
6444     env = _BuildInstanceHookEnvByObject(self, self.instance)
6445     env["INSTANCE_NEW_NAME"] = self.op.new_name
6446     return env
6447
6448   def BuildHooksNodes(self):
6449     """Build hooks nodes.
6450
6451     """
6452     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6453     return (nl, nl)
6454
6455   def CheckPrereq(self):
6456     """Check prerequisites.
6457
6458     This checks that the instance is in the cluster and is not running.
6459
6460     """
6461     self.op.instance_name = _ExpandInstanceName(self.cfg,
6462                                                 self.op.instance_name)
6463     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6464     assert instance is not None
6465     _CheckNodeOnline(self, instance.primary_node)
6466     _CheckInstanceDown(self, instance, "cannot rename")
6467     self.instance = instance
6468
6469     new_name = self.op.new_name
6470     if self.op.name_check:
6471       hostname = netutils.GetHostname(name=new_name)
6472       if hostname.name != new_name:
6473         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6474                      hostname.name)
6475       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6476         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6477                                     " same as given hostname '%s'") %
6478                                     (hostname.name, self.op.new_name),
6479                                     errors.ECODE_INVAL)
6480       new_name = self.op.new_name = hostname.name
6481       if (self.op.ip_check and
6482           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6483         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6484                                    (hostname.ip, new_name),
6485                                    errors.ECODE_NOTUNIQUE)
6486
6487     instance_list = self.cfg.GetInstanceList()
6488     if new_name in instance_list and new_name != instance.name:
6489       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6490                                  new_name, errors.ECODE_EXISTS)
6491
6492   def Exec(self, feedback_fn):
6493     """Rename the instance.
6494
6495     """
6496     inst = self.instance
6497     old_name = inst.name
6498
6499     rename_file_storage = False
6500     if (inst.disk_template in constants.DTS_FILEBASED and
6501         self.op.new_name != inst.name):
6502       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6503       rename_file_storage = True
6504
6505     self.cfg.RenameInstance(inst.name, self.op.new_name)
6506     # Change the instance lock. This is definitely safe while we hold the BGL.
6507     # Otherwise the new lock would have to be added in acquired mode.
6508     assert self.REQ_BGL
6509     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6510     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6511
6512     # re-read the instance from the configuration after rename
6513     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6514
6515     if rename_file_storage:
6516       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6517       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6518                                                      old_file_storage_dir,
6519                                                      new_file_storage_dir)
6520       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6521                    " (but the instance has been renamed in Ganeti)" %
6522                    (inst.primary_node, old_file_storage_dir,
6523                     new_file_storage_dir))
6524
6525     _StartInstanceDisks(self, inst, None)
6526     try:
6527       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6528                                                  old_name, self.op.debug_level)
6529       msg = result.fail_msg
6530       if msg:
6531         msg = ("Could not run OS rename script for instance %s on node %s"
6532                " (but the instance has been renamed in Ganeti): %s" %
6533                (inst.name, inst.primary_node, msg))
6534         self.proc.LogWarning(msg)
6535     finally:
6536       _ShutdownInstanceDisks(self, inst)
6537
6538     return inst.name
6539
6540
6541 class LUInstanceRemove(LogicalUnit):
6542   """Remove an instance.
6543
6544   """
6545   HPATH = "instance-remove"
6546   HTYPE = constants.HTYPE_INSTANCE
6547   REQ_BGL = False
6548
6549   def ExpandNames(self):
6550     self._ExpandAndLockInstance()
6551     self.needed_locks[locking.LEVEL_NODE] = []
6552     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6553
6554   def DeclareLocks(self, level):
6555     if level == locking.LEVEL_NODE:
6556       self._LockInstancesNodes()
6557
6558   def BuildHooksEnv(self):
6559     """Build hooks env.
6560
6561     This runs on master, primary and secondary nodes of the instance.
6562
6563     """
6564     env = _BuildInstanceHookEnvByObject(self, self.instance)
6565     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6566     return env
6567
6568   def BuildHooksNodes(self):
6569     """Build hooks nodes.
6570
6571     """
6572     nl = [self.cfg.GetMasterNode()]
6573     nl_post = list(self.instance.all_nodes) + nl
6574     return (nl, nl_post)
6575
6576   def CheckPrereq(self):
6577     """Check prerequisites.
6578
6579     This checks that the instance is in the cluster.
6580
6581     """
6582     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6583     assert self.instance is not None, \
6584       "Cannot retrieve locked instance %s" % self.op.instance_name
6585
6586   def Exec(self, feedback_fn):
6587     """Remove the instance.
6588
6589     """
6590     instance = self.instance
6591     logging.info("Shutting down instance %s on node %s",
6592                  instance.name, instance.primary_node)
6593
6594     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6595                                              self.op.shutdown_timeout)
6596     msg = result.fail_msg
6597     if msg:
6598       if self.op.ignore_failures:
6599         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6600       else:
6601         raise errors.OpExecError("Could not shutdown instance %s on"
6602                                  " node %s: %s" %
6603                                  (instance.name, instance.primary_node, msg))
6604
6605     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6606
6607
6608 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6609   """Utility function to remove an instance.
6610
6611   """
6612   logging.info("Removing block devices for instance %s", instance.name)
6613
6614   if not _RemoveDisks(lu, instance):
6615     if not ignore_failures:
6616       raise errors.OpExecError("Can't remove instance's disks")
6617     feedback_fn("Warning: can't remove instance's disks")
6618
6619   logging.info("Removing instance %s out of cluster config", instance.name)
6620
6621   lu.cfg.RemoveInstance(instance.name)
6622
6623   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6624     "Instance lock removal conflict"
6625
6626   # Remove lock for the instance
6627   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6628
6629
6630 class LUInstanceQuery(NoHooksLU):
6631   """Logical unit for querying instances.
6632
6633   """
6634   # pylint: disable=W0142
6635   REQ_BGL = False
6636
6637   def CheckArguments(self):
6638     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6639                              self.op.output_fields, self.op.use_locking)
6640
6641   def ExpandNames(self):
6642     self.iq.ExpandNames(self)
6643
6644   def DeclareLocks(self, level):
6645     self.iq.DeclareLocks(self, level)
6646
6647   def Exec(self, feedback_fn):
6648     return self.iq.OldStyleQuery(self)
6649
6650
6651 class LUInstanceFailover(LogicalUnit):
6652   """Failover an instance.
6653
6654   """
6655   HPATH = "instance-failover"
6656   HTYPE = constants.HTYPE_INSTANCE
6657   REQ_BGL = False
6658
6659   def CheckArguments(self):
6660     """Check the arguments.
6661
6662     """
6663     self.iallocator = getattr(self.op, "iallocator", None)
6664     self.target_node = getattr(self.op, "target_node", None)
6665
6666   def ExpandNames(self):
6667     self._ExpandAndLockInstance()
6668
6669     if self.op.target_node is not None:
6670       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6671
6672     self.needed_locks[locking.LEVEL_NODE] = []
6673     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6674
6675     ignore_consistency = self.op.ignore_consistency
6676     shutdown_timeout = self.op.shutdown_timeout
6677     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6678                                        cleanup=False,
6679                                        failover=True,
6680                                        ignore_consistency=ignore_consistency,
6681                                        shutdown_timeout=shutdown_timeout)
6682     self.tasklets = [self._migrater]
6683
6684   def DeclareLocks(self, level):
6685     if level == locking.LEVEL_NODE:
6686       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6687       if instance.disk_template in constants.DTS_EXT_MIRROR:
6688         if self.op.target_node is None:
6689           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6690         else:
6691           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6692                                                    self.op.target_node]
6693         del self.recalculate_locks[locking.LEVEL_NODE]
6694       else:
6695         self._LockInstancesNodes()
6696
6697   def BuildHooksEnv(self):
6698     """Build hooks env.
6699
6700     This runs on master, primary and secondary nodes of the instance.
6701
6702     """
6703     instance = self._migrater.instance
6704     source_node = instance.primary_node
6705     target_node = self.op.target_node
6706     env = {
6707       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6708       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6709       "OLD_PRIMARY": source_node,
6710       "NEW_PRIMARY": target_node,
6711       }
6712
6713     if instance.disk_template in constants.DTS_INT_MIRROR:
6714       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6715       env["NEW_SECONDARY"] = source_node
6716     else:
6717       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6718
6719     env.update(_BuildInstanceHookEnvByObject(self, instance))
6720
6721     return env
6722
6723   def BuildHooksNodes(self):
6724     """Build hooks nodes.
6725
6726     """
6727     instance = self._migrater.instance
6728     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6729     return (nl, nl + [instance.primary_node])
6730
6731
6732 class LUInstanceMigrate(LogicalUnit):
6733   """Migrate an instance.
6734
6735   This is migration without shutting down, compared to the failover,
6736   which is done with shutdown.
6737
6738   """
6739   HPATH = "instance-migrate"
6740   HTYPE = constants.HTYPE_INSTANCE
6741   REQ_BGL = False
6742
6743   def ExpandNames(self):
6744     self._ExpandAndLockInstance()
6745
6746     if self.op.target_node is not None:
6747       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6748
6749     self.needed_locks[locking.LEVEL_NODE] = []
6750     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6751
6752     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6753                                        cleanup=self.op.cleanup,
6754                                        failover=False,
6755                                        fallback=self.op.allow_failover)
6756     self.tasklets = [self._migrater]
6757
6758   def DeclareLocks(self, level):
6759     if level == locking.LEVEL_NODE:
6760       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6761       if instance.disk_template in constants.DTS_EXT_MIRROR:
6762         if self.op.target_node is None:
6763           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6764         else:
6765           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6766                                                    self.op.target_node]
6767         del self.recalculate_locks[locking.LEVEL_NODE]
6768       else:
6769         self._LockInstancesNodes()
6770
6771   def BuildHooksEnv(self):
6772     """Build hooks env.
6773
6774     This runs on master, primary and secondary nodes of the instance.
6775
6776     """
6777     instance = self._migrater.instance
6778     source_node = instance.primary_node
6779     target_node = self.op.target_node
6780     env = _BuildInstanceHookEnvByObject(self, instance)
6781     env.update({
6782       "MIGRATE_LIVE": self._migrater.live,
6783       "MIGRATE_CLEANUP": self.op.cleanup,
6784       "OLD_PRIMARY": source_node,
6785       "NEW_PRIMARY": target_node,
6786       })
6787
6788     if instance.disk_template in constants.DTS_INT_MIRROR:
6789       env["OLD_SECONDARY"] = target_node
6790       env["NEW_SECONDARY"] = source_node
6791     else:
6792       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6793
6794     return env
6795
6796   def BuildHooksNodes(self):
6797     """Build hooks nodes.
6798
6799     """
6800     instance = self._migrater.instance
6801     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6802     return (nl, nl + [instance.primary_node])
6803
6804
6805 class LUInstanceMove(LogicalUnit):
6806   """Move an instance by data-copying.
6807
6808   """
6809   HPATH = "instance-move"
6810   HTYPE = constants.HTYPE_INSTANCE
6811   REQ_BGL = False
6812
6813   def ExpandNames(self):
6814     self._ExpandAndLockInstance()
6815     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6816     self.op.target_node = target_node
6817     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6818     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6819
6820   def DeclareLocks(self, level):
6821     if level == locking.LEVEL_NODE:
6822       self._LockInstancesNodes(primary_only=True)
6823
6824   def BuildHooksEnv(self):
6825     """Build hooks env.
6826
6827     This runs on master, primary and secondary nodes of the instance.
6828
6829     """
6830     env = {
6831       "TARGET_NODE": self.op.target_node,
6832       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6833       }
6834     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6835     return env
6836
6837   def BuildHooksNodes(self):
6838     """Build hooks nodes.
6839
6840     """
6841     nl = [
6842       self.cfg.GetMasterNode(),
6843       self.instance.primary_node,
6844       self.op.target_node,
6845       ]
6846     return (nl, nl)
6847
6848   def CheckPrereq(self):
6849     """Check prerequisites.
6850
6851     This checks that the instance is in the cluster.
6852
6853     """
6854     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6855     assert self.instance is not None, \
6856       "Cannot retrieve locked instance %s" % self.op.instance_name
6857
6858     node = self.cfg.GetNodeInfo(self.op.target_node)
6859     assert node is not None, \
6860       "Cannot retrieve locked node %s" % self.op.target_node
6861
6862     self.target_node = target_node = node.name
6863
6864     if target_node == instance.primary_node:
6865       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6866                                  (instance.name, target_node),
6867                                  errors.ECODE_STATE)
6868
6869     bep = self.cfg.GetClusterInfo().FillBE(instance)
6870
6871     for idx, dsk in enumerate(instance.disks):
6872       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6873         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6874                                    " cannot copy" % idx, errors.ECODE_STATE)
6875
6876     _CheckNodeOnline(self, target_node)
6877     _CheckNodeNotDrained(self, target_node)
6878     _CheckNodeVmCapable(self, target_node)
6879
6880     if instance.admin_up:
6881       # check memory requirements on the secondary node
6882       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6883                            instance.name, bep[constants.BE_MEMORY],
6884                            instance.hypervisor)
6885     else:
6886       self.LogInfo("Not checking memory on the secondary node as"
6887                    " instance will not be started")
6888
6889     # check bridge existance
6890     _CheckInstanceBridgesExist(self, instance, node=target_node)
6891
6892   def Exec(self, feedback_fn):
6893     """Move an instance.
6894
6895     The move is done by shutting it down on its present node, copying
6896     the data over (slow) and starting it on the new node.
6897
6898     """
6899     instance = self.instance
6900
6901     source_node = instance.primary_node
6902     target_node = self.target_node
6903
6904     self.LogInfo("Shutting down instance %s on source node %s",
6905                  instance.name, source_node)
6906
6907     result = self.rpc.call_instance_shutdown(source_node, instance,
6908                                              self.op.shutdown_timeout)
6909     msg = result.fail_msg
6910     if msg:
6911       if self.op.ignore_consistency:
6912         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6913                              " Proceeding anyway. Please make sure node"
6914                              " %s is down. Error details: %s",
6915                              instance.name, source_node, source_node, msg)
6916       else:
6917         raise errors.OpExecError("Could not shutdown instance %s on"
6918                                  " node %s: %s" %
6919                                  (instance.name, source_node, msg))
6920
6921     # create the target disks
6922     try:
6923       _CreateDisks(self, instance, target_node=target_node)
6924     except errors.OpExecError:
6925       self.LogWarning("Device creation failed, reverting...")
6926       try:
6927         _RemoveDisks(self, instance, target_node=target_node)
6928       finally:
6929         self.cfg.ReleaseDRBDMinors(instance.name)
6930         raise
6931
6932     cluster_name = self.cfg.GetClusterInfo().cluster_name
6933
6934     errs = []
6935     # activate, get path, copy the data over
6936     for idx, disk in enumerate(instance.disks):
6937       self.LogInfo("Copying data for disk %d", idx)
6938       result = self.rpc.call_blockdev_assemble(target_node, disk,
6939                                                instance.name, True, idx)
6940       if result.fail_msg:
6941         self.LogWarning("Can't assemble newly created disk %d: %s",
6942                         idx, result.fail_msg)
6943         errs.append(result.fail_msg)
6944         break
6945       dev_path = result.payload
6946       result = self.rpc.call_blockdev_export(source_node, disk,
6947                                              target_node, dev_path,
6948                                              cluster_name)
6949       if result.fail_msg:
6950         self.LogWarning("Can't copy data over for disk %d: %s",
6951                         idx, result.fail_msg)
6952         errs.append(result.fail_msg)
6953         break
6954
6955     if errs:
6956       self.LogWarning("Some disks failed to copy, aborting")
6957       try:
6958         _RemoveDisks(self, instance, target_node=target_node)
6959       finally:
6960         self.cfg.ReleaseDRBDMinors(instance.name)
6961         raise errors.OpExecError("Errors during disk copy: %s" %
6962                                  (",".join(errs),))
6963
6964     instance.primary_node = target_node
6965     self.cfg.Update(instance, feedback_fn)
6966
6967     self.LogInfo("Removing the disks on the original node")
6968     _RemoveDisks(self, instance, target_node=source_node)
6969
6970     # Only start the instance if it's marked as up
6971     if instance.admin_up:
6972       self.LogInfo("Starting instance %s on node %s",
6973                    instance.name, target_node)
6974
6975       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6976                                            ignore_secondaries=True)
6977       if not disks_ok:
6978         _ShutdownInstanceDisks(self, instance)
6979         raise errors.OpExecError("Can't activate the instance's disks")
6980
6981       result = self.rpc.call_instance_start(target_node, instance,
6982                                             None, None, False)
6983       msg = result.fail_msg
6984       if msg:
6985         _ShutdownInstanceDisks(self, instance)
6986         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6987                                  (instance.name, target_node, msg))
6988
6989
6990 class LUNodeMigrate(LogicalUnit):
6991   """Migrate all instances from a node.
6992
6993   """
6994   HPATH = "node-migrate"
6995   HTYPE = constants.HTYPE_NODE
6996   REQ_BGL = False
6997
6998   def CheckArguments(self):
6999     pass
7000
7001   def ExpandNames(self):
7002     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7003
7004     self.share_locks = _ShareAll()
7005     self.needed_locks = {
7006       locking.LEVEL_NODE: [self.op.node_name],
7007       }
7008
7009   def BuildHooksEnv(self):
7010     """Build hooks env.
7011
7012     This runs on the master, the primary and all the secondaries.
7013
7014     """
7015     return {
7016       "NODE_NAME": self.op.node_name,
7017       }
7018
7019   def BuildHooksNodes(self):
7020     """Build hooks nodes.
7021
7022     """
7023     nl = [self.cfg.GetMasterNode()]
7024     return (nl, nl)
7025
7026   def CheckPrereq(self):
7027     pass
7028
7029   def Exec(self, feedback_fn):
7030     # Prepare jobs for migration instances
7031     jobs = [
7032       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7033                                  mode=self.op.mode,
7034                                  live=self.op.live,
7035                                  iallocator=self.op.iallocator,
7036                                  target_node=self.op.target_node)]
7037       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7038       ]
7039
7040     # TODO: Run iallocator in this opcode and pass correct placement options to
7041     # OpInstanceMigrate. Since other jobs can modify the cluster between
7042     # running the iallocator and the actual migration, a good consistency model
7043     # will have to be found.
7044
7045     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7046             frozenset([self.op.node_name]))
7047
7048     return ResultWithJobs(jobs)
7049
7050
7051 class TLMigrateInstance(Tasklet):
7052   """Tasklet class for instance migration.
7053
7054   @type live: boolean
7055   @ivar live: whether the migration will be done live or non-live;
7056       this variable is initalized only after CheckPrereq has run
7057   @type cleanup: boolean
7058   @ivar cleanup: Wheater we cleanup from a failed migration
7059   @type iallocator: string
7060   @ivar iallocator: The iallocator used to determine target_node
7061   @type target_node: string
7062   @ivar target_node: If given, the target_node to reallocate the instance to
7063   @type failover: boolean
7064   @ivar failover: Whether operation results in failover or migration
7065   @type fallback: boolean
7066   @ivar fallback: Whether fallback to failover is allowed if migration not
7067                   possible
7068   @type ignore_consistency: boolean
7069   @ivar ignore_consistency: Wheter we should ignore consistency between source
7070                             and target node
7071   @type shutdown_timeout: int
7072   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7073
7074   """
7075   def __init__(self, lu, instance_name, cleanup=False,
7076                failover=False, fallback=False,
7077                ignore_consistency=False,
7078                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7079     """Initializes this class.
7080
7081     """
7082     Tasklet.__init__(self, lu)
7083
7084     # Parameters
7085     self.instance_name = instance_name
7086     self.cleanup = cleanup
7087     self.live = False # will be overridden later
7088     self.failover = failover
7089     self.fallback = fallback
7090     self.ignore_consistency = ignore_consistency
7091     self.shutdown_timeout = shutdown_timeout
7092
7093   def CheckPrereq(self):
7094     """Check prerequisites.
7095
7096     This checks that the instance is in the cluster.
7097
7098     """
7099     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7100     instance = self.cfg.GetInstanceInfo(instance_name)
7101     assert instance is not None
7102     self.instance = instance
7103
7104     if (not self.cleanup and not instance.admin_up and not self.failover and
7105         self.fallback):
7106       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7107                       " to failover")
7108       self.failover = True
7109
7110     if instance.disk_template not in constants.DTS_MIRRORED:
7111       if self.failover:
7112         text = "failovers"
7113       else:
7114         text = "migrations"
7115       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7116                                  " %s" % (instance.disk_template, text),
7117                                  errors.ECODE_STATE)
7118
7119     if instance.disk_template in constants.DTS_EXT_MIRROR:
7120       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7121
7122       if self.lu.op.iallocator:
7123         self._RunAllocator()
7124       else:
7125         # We set set self.target_node as it is required by
7126         # BuildHooksEnv
7127         self.target_node = self.lu.op.target_node
7128
7129       # self.target_node is already populated, either directly or by the
7130       # iallocator run
7131       target_node = self.target_node
7132       if self.target_node == instance.primary_node:
7133         raise errors.OpPrereqError("Cannot migrate instance %s"
7134                                    " to its primary (%s)" %
7135                                    (instance.name, instance.primary_node))
7136
7137       if len(self.lu.tasklets) == 1:
7138         # It is safe to release locks only when we're the only tasklet
7139         # in the LU
7140         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7141                       keep=[instance.primary_node, self.target_node])
7142
7143     else:
7144       secondary_nodes = instance.secondary_nodes
7145       if not secondary_nodes:
7146         raise errors.ConfigurationError("No secondary node but using"
7147                                         " %s disk template" %
7148                                         instance.disk_template)
7149       target_node = secondary_nodes[0]
7150       if self.lu.op.iallocator or (self.lu.op.target_node and
7151                                    self.lu.op.target_node != target_node):
7152         if self.failover:
7153           text = "failed over"
7154         else:
7155           text = "migrated"
7156         raise errors.OpPrereqError("Instances with disk template %s cannot"
7157                                    " be %s to arbitrary nodes"
7158                                    " (neither an iallocator nor a target"
7159                                    " node can be passed)" %
7160                                    (instance.disk_template, text),
7161                                    errors.ECODE_INVAL)
7162
7163     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7164
7165     # check memory requirements on the secondary node
7166     if not self.failover or instance.admin_up:
7167       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7168                            instance.name, i_be[constants.BE_MEMORY],
7169                            instance.hypervisor)
7170     else:
7171       self.lu.LogInfo("Not checking memory on the secondary node as"
7172                       " instance will not be started")
7173
7174     # check bridge existance
7175     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7176
7177     if not self.cleanup:
7178       _CheckNodeNotDrained(self.lu, target_node)
7179       if not self.failover:
7180         result = self.rpc.call_instance_migratable(instance.primary_node,
7181                                                    instance)
7182         if result.fail_msg and self.fallback:
7183           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7184                           " failover")
7185           self.failover = True
7186         else:
7187           result.Raise("Can't migrate, please use failover",
7188                        prereq=True, ecode=errors.ECODE_STATE)
7189
7190     assert not (self.failover and self.cleanup)
7191
7192     if not self.failover:
7193       if self.lu.op.live is not None and self.lu.op.mode is not None:
7194         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7195                                    " parameters are accepted",
7196                                    errors.ECODE_INVAL)
7197       if self.lu.op.live is not None:
7198         if self.lu.op.live:
7199           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7200         else:
7201           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7202         # reset the 'live' parameter to None so that repeated
7203         # invocations of CheckPrereq do not raise an exception
7204         self.lu.op.live = None
7205       elif self.lu.op.mode is None:
7206         # read the default value from the hypervisor
7207         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7208                                                 skip_globals=False)
7209         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7210
7211       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7212     else:
7213       # Failover is never live
7214       self.live = False
7215
7216   def _RunAllocator(self):
7217     """Run the allocator based on input opcode.
7218
7219     """
7220     ial = IAllocator(self.cfg, self.rpc,
7221                      mode=constants.IALLOCATOR_MODE_RELOC,
7222                      name=self.instance_name,
7223                      # TODO See why hail breaks with a single node below
7224                      relocate_from=[self.instance.primary_node,
7225                                     self.instance.primary_node],
7226                      )
7227
7228     ial.Run(self.lu.op.iallocator)
7229
7230     if not ial.success:
7231       raise errors.OpPrereqError("Can't compute nodes using"
7232                                  " iallocator '%s': %s" %
7233                                  (self.lu.op.iallocator, ial.info),
7234                                  errors.ECODE_NORES)
7235     if len(ial.result) != ial.required_nodes:
7236       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7237                                  " of nodes (%s), required %s" %
7238                                  (self.lu.op.iallocator, len(ial.result),
7239                                   ial.required_nodes), errors.ECODE_FAULT)
7240     self.target_node = ial.result[0]
7241     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7242                  self.instance_name, self.lu.op.iallocator,
7243                  utils.CommaJoin(ial.result))
7244
7245   def _WaitUntilSync(self):
7246     """Poll with custom rpc for disk sync.
7247
7248     This uses our own step-based rpc call.
7249
7250     """
7251     self.feedback_fn("* wait until resync is done")
7252     all_done = False
7253     while not all_done:
7254       all_done = True
7255       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7256                                             self.nodes_ip,
7257                                             self.instance.disks)
7258       min_percent = 100
7259       for node, nres in result.items():
7260         nres.Raise("Cannot resync disks on node %s" % node)
7261         node_done, node_percent = nres.payload
7262         all_done = all_done and node_done
7263         if node_percent is not None:
7264           min_percent = min(min_percent, node_percent)
7265       if not all_done:
7266         if min_percent < 100:
7267           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7268         time.sleep(2)
7269
7270   def _EnsureSecondary(self, node):
7271     """Demote a node to secondary.
7272
7273     """
7274     self.feedback_fn("* switching node %s to secondary mode" % node)
7275
7276     for dev in self.instance.disks:
7277       self.cfg.SetDiskID(dev, node)
7278
7279     result = self.rpc.call_blockdev_close(node, self.instance.name,
7280                                           self.instance.disks)
7281     result.Raise("Cannot change disk to secondary on node %s" % node)
7282
7283   def _GoStandalone(self):
7284     """Disconnect from the network.
7285
7286     """
7287     self.feedback_fn("* changing into standalone mode")
7288     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7289                                                self.instance.disks)
7290     for node, nres in result.items():
7291       nres.Raise("Cannot disconnect disks node %s" % node)
7292
7293   def _GoReconnect(self, multimaster):
7294     """Reconnect to the network.
7295
7296     """
7297     if multimaster:
7298       msg = "dual-master"
7299     else:
7300       msg = "single-master"
7301     self.feedback_fn("* changing disks into %s mode" % msg)
7302     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7303                                            self.instance.disks,
7304                                            self.instance.name, multimaster)
7305     for node, nres in result.items():
7306       nres.Raise("Cannot change disks config on node %s" % node)
7307
7308   def _ExecCleanup(self):
7309     """Try to cleanup after a failed migration.
7310
7311     The cleanup is done by:
7312       - check that the instance is running only on one node
7313         (and update the config if needed)
7314       - change disks on its secondary node to secondary
7315       - wait until disks are fully synchronized
7316       - disconnect from the network
7317       - change disks into single-master mode
7318       - wait again until disks are fully synchronized
7319
7320     """
7321     instance = self.instance
7322     target_node = self.target_node
7323     source_node = self.source_node
7324
7325     # check running on only one node
7326     self.feedback_fn("* checking where the instance actually runs"
7327                      " (if this hangs, the hypervisor might be in"
7328                      " a bad state)")
7329     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7330     for node, result in ins_l.items():
7331       result.Raise("Can't contact node %s" % node)
7332
7333     runningon_source = instance.name in ins_l[source_node].payload
7334     runningon_target = instance.name in ins_l[target_node].payload
7335
7336     if runningon_source and runningon_target:
7337       raise errors.OpExecError("Instance seems to be running on two nodes,"
7338                                " or the hypervisor is confused; you will have"
7339                                " to ensure manually that it runs only on one"
7340                                " and restart this operation")
7341
7342     if not (runningon_source or runningon_target):
7343       raise errors.OpExecError("Instance does not seem to be running at all;"
7344                                " in this case it's safer to repair by"
7345                                " running 'gnt-instance stop' to ensure disk"
7346                                " shutdown, and then restarting it")
7347
7348     if runningon_target:
7349       # the migration has actually succeeded, we need to update the config
7350       self.feedback_fn("* instance running on secondary node (%s),"
7351                        " updating config" % target_node)
7352       instance.primary_node = target_node
7353       self.cfg.Update(instance, self.feedback_fn)
7354       demoted_node = source_node
7355     else:
7356       self.feedback_fn("* instance confirmed to be running on its"
7357                        " primary node (%s)" % source_node)
7358       demoted_node = target_node
7359
7360     if instance.disk_template in constants.DTS_INT_MIRROR:
7361       self._EnsureSecondary(demoted_node)
7362       try:
7363         self._WaitUntilSync()
7364       except errors.OpExecError:
7365         # we ignore here errors, since if the device is standalone, it
7366         # won't be able to sync
7367         pass
7368       self._GoStandalone()
7369       self._GoReconnect(False)
7370       self._WaitUntilSync()
7371
7372     self.feedback_fn("* done")
7373
7374   def _RevertDiskStatus(self):
7375     """Try to revert the disk status after a failed migration.
7376
7377     """
7378     target_node = self.target_node
7379     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7380       return
7381
7382     try:
7383       self._EnsureSecondary(target_node)
7384       self._GoStandalone()
7385       self._GoReconnect(False)
7386       self._WaitUntilSync()
7387     except errors.OpExecError, err:
7388       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7389                          " please try to recover the instance manually;"
7390                          " error '%s'" % str(err))
7391
7392   def _AbortMigration(self):
7393     """Call the hypervisor code to abort a started migration.
7394
7395     """
7396     instance = self.instance
7397     target_node = self.target_node
7398     migration_info = self.migration_info
7399
7400     abort_result = self.rpc.call_finalize_migration(target_node,
7401                                                     instance,
7402                                                     migration_info,
7403                                                     False)
7404     abort_msg = abort_result.fail_msg
7405     if abort_msg:
7406       logging.error("Aborting migration failed on target node %s: %s",
7407                     target_node, abort_msg)
7408       # Don't raise an exception here, as we stil have to try to revert the
7409       # disk status, even if this step failed.
7410
7411   def _ExecMigration(self):
7412     """Migrate an instance.
7413
7414     The migrate is done by:
7415       - change the disks into dual-master mode
7416       - wait until disks are fully synchronized again
7417       - migrate the instance
7418       - change disks on the new secondary node (the old primary) to secondary
7419       - wait until disks are fully synchronized
7420       - change disks into single-master mode
7421
7422     """
7423     instance = self.instance
7424     target_node = self.target_node
7425     source_node = self.source_node
7426
7427     # Check for hypervisor version mismatch and warn the user.
7428     nodeinfo = self.rpc.call_node_info([source_node, target_node],
7429                                        None, self.instance.hypervisor)
7430     src_info = nodeinfo[source_node]
7431     dst_info = nodeinfo[target_node]
7432
7433     if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7434         (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7435       src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7436       dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7437       if src_version != dst_version:
7438         self.feedback_fn("* warning: hypervisor version mismatch between"
7439                          " source (%s) and target (%s) node" %
7440                          (src_version, dst_version))
7441
7442     self.feedback_fn("* checking disk consistency between source and target")
7443     for dev in instance.disks:
7444       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7445         raise errors.OpExecError("Disk %s is degraded or not fully"
7446                                  " synchronized on target node,"
7447                                  " aborting migration" % dev.iv_name)
7448
7449     # First get the migration information from the remote node
7450     result = self.rpc.call_migration_info(source_node, instance)
7451     msg = result.fail_msg
7452     if msg:
7453       log_err = ("Failed fetching source migration information from %s: %s" %
7454                  (source_node, msg))
7455       logging.error(log_err)
7456       raise errors.OpExecError(log_err)
7457
7458     self.migration_info = migration_info = result.payload
7459
7460     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7461       # Then switch the disks to master/master mode
7462       self._EnsureSecondary(target_node)
7463       self._GoStandalone()
7464       self._GoReconnect(True)
7465       self._WaitUntilSync()
7466
7467     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7468     result = self.rpc.call_accept_instance(target_node,
7469                                            instance,
7470                                            migration_info,
7471                                            self.nodes_ip[target_node])
7472
7473     msg = result.fail_msg
7474     if msg:
7475       logging.error("Instance pre-migration failed, trying to revert"
7476                     " disk status: %s", msg)
7477       self.feedback_fn("Pre-migration failed, aborting")
7478       self._AbortMigration()
7479       self._RevertDiskStatus()
7480       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7481                                (instance.name, msg))
7482
7483     self.feedback_fn("* migrating instance to %s" % target_node)
7484     result = self.rpc.call_instance_migrate(source_node, instance,
7485                                             self.nodes_ip[target_node],
7486                                             self.live)
7487     msg = result.fail_msg
7488     if msg:
7489       logging.error("Instance migration failed, trying to revert"
7490                     " disk status: %s", msg)
7491       self.feedback_fn("Migration failed, aborting")
7492       self._AbortMigration()
7493       self._RevertDiskStatus()
7494       raise errors.OpExecError("Could not migrate instance %s: %s" %
7495                                (instance.name, msg))
7496
7497     instance.primary_node = target_node
7498     # distribute new instance config to the other nodes
7499     self.cfg.Update(instance, self.feedback_fn)
7500
7501     result = self.rpc.call_finalize_migration(target_node,
7502                                               instance,
7503                                               migration_info,
7504                                               True)
7505     msg = result.fail_msg
7506     if msg:
7507       logging.error("Instance migration succeeded, but finalization failed:"
7508                     " %s", msg)
7509       raise errors.OpExecError("Could not finalize instance migration: %s" %
7510                                msg)
7511
7512     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7513       self._EnsureSecondary(source_node)
7514       self._WaitUntilSync()
7515       self._GoStandalone()
7516       self._GoReconnect(False)
7517       self._WaitUntilSync()
7518
7519     self.feedback_fn("* done")
7520
7521   def _ExecFailover(self):
7522     """Failover an instance.
7523
7524     The failover is done by shutting it down on its present node and
7525     starting it on the secondary.
7526
7527     """
7528     instance = self.instance
7529     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7530
7531     source_node = instance.primary_node
7532     target_node = self.target_node
7533
7534     if instance.admin_up:
7535       self.feedback_fn("* checking disk consistency between source and target")
7536       for dev in instance.disks:
7537         # for drbd, these are drbd over lvm
7538         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7539           if primary_node.offline:
7540             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7541                              " target node %s" %
7542                              (primary_node.name, dev.iv_name, target_node))
7543           elif not self.ignore_consistency:
7544             raise errors.OpExecError("Disk %s is degraded on target node,"
7545                                      " aborting failover" % dev.iv_name)
7546     else:
7547       self.feedback_fn("* not checking disk consistency as instance is not"
7548                        " running")
7549
7550     self.feedback_fn("* shutting down instance on source node")
7551     logging.info("Shutting down instance %s on node %s",
7552                  instance.name, source_node)
7553
7554     result = self.rpc.call_instance_shutdown(source_node, instance,
7555                                              self.shutdown_timeout)
7556     msg = result.fail_msg
7557     if msg:
7558       if self.ignore_consistency or primary_node.offline:
7559         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7560                            " proceeding anyway; please make sure node"
7561                            " %s is down; error details: %s",
7562                            instance.name, source_node, source_node, msg)
7563       else:
7564         raise errors.OpExecError("Could not shutdown instance %s on"
7565                                  " node %s: %s" %
7566                                  (instance.name, source_node, msg))
7567
7568     self.feedback_fn("* deactivating the instance's disks on source node")
7569     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7570       raise errors.OpExecError("Can't shut down the instance's disks")
7571
7572     instance.primary_node = target_node
7573     # distribute new instance config to the other nodes
7574     self.cfg.Update(instance, self.feedback_fn)
7575
7576     # Only start the instance if it's marked as up
7577     if instance.admin_up:
7578       self.feedback_fn("* activating the instance's disks on target node %s" %
7579                        target_node)
7580       logging.info("Starting instance %s on node %s",
7581                    instance.name, target_node)
7582
7583       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7584                                            ignore_secondaries=True)
7585       if not disks_ok:
7586         _ShutdownInstanceDisks(self.lu, instance)
7587         raise errors.OpExecError("Can't activate the instance's disks")
7588
7589       self.feedback_fn("* starting the instance on the target node %s" %
7590                        target_node)
7591       result = self.rpc.call_instance_start(target_node, instance, None, None,
7592                                             False)
7593       msg = result.fail_msg
7594       if msg:
7595         _ShutdownInstanceDisks(self.lu, instance)
7596         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7597                                  (instance.name, target_node, msg))
7598
7599   def Exec(self, feedback_fn):
7600     """Perform the migration.
7601
7602     """
7603     self.feedback_fn = feedback_fn
7604     self.source_node = self.instance.primary_node
7605
7606     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7607     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7608       self.target_node = self.instance.secondary_nodes[0]
7609       # Otherwise self.target_node has been populated either
7610       # directly, or through an iallocator.
7611
7612     self.all_nodes = [self.source_node, self.target_node]
7613     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7614                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7615
7616     if self.failover:
7617       feedback_fn("Failover instance %s" % self.instance.name)
7618       self._ExecFailover()
7619     else:
7620       feedback_fn("Migrating instance %s" % self.instance.name)
7621
7622       if self.cleanup:
7623         return self._ExecCleanup()
7624       else:
7625         return self._ExecMigration()
7626
7627
7628 def _CreateBlockDev(lu, node, instance, device, force_create,
7629                     info, force_open):
7630   """Create a tree of block devices on a given node.
7631
7632   If this device type has to be created on secondaries, create it and
7633   all its children.
7634
7635   If not, just recurse to children keeping the same 'force' value.
7636
7637   @param lu: the lu on whose behalf we execute
7638   @param node: the node on which to create the device
7639   @type instance: L{objects.Instance}
7640   @param instance: the instance which owns the device
7641   @type device: L{objects.Disk}
7642   @param device: the device to create
7643   @type force_create: boolean
7644   @param force_create: whether to force creation of this device; this
7645       will be change to True whenever we find a device which has
7646       CreateOnSecondary() attribute
7647   @param info: the extra 'metadata' we should attach to the device
7648       (this will be represented as a LVM tag)
7649   @type force_open: boolean
7650   @param force_open: this parameter will be passes to the
7651       L{backend.BlockdevCreate} function where it specifies
7652       whether we run on primary or not, and it affects both
7653       the child assembly and the device own Open() execution
7654
7655   """
7656   if device.CreateOnSecondary():
7657     force_create = True
7658
7659   if device.children:
7660     for child in device.children:
7661       _CreateBlockDev(lu, node, instance, child, force_create,
7662                       info, force_open)
7663
7664   if not force_create:
7665     return
7666
7667   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7668
7669
7670 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7671   """Create a single block device on a given node.
7672
7673   This will not recurse over children of the device, so they must be
7674   created in advance.
7675
7676   @param lu: the lu on whose behalf we execute
7677   @param node: the node on which to create the device
7678   @type instance: L{objects.Instance}
7679   @param instance: the instance which owns the device
7680   @type device: L{objects.Disk}
7681   @param device: the device to create
7682   @param info: the extra 'metadata' we should attach to the device
7683       (this will be represented as a LVM tag)
7684   @type force_open: boolean
7685   @param force_open: this parameter will be passes to the
7686       L{backend.BlockdevCreate} function where it specifies
7687       whether we run on primary or not, and it affects both
7688       the child assembly and the device own Open() execution
7689
7690   """
7691   lu.cfg.SetDiskID(device, node)
7692   result = lu.rpc.call_blockdev_create(node, device, device.size,
7693                                        instance.name, force_open, info)
7694   result.Raise("Can't create block device %s on"
7695                " node %s for instance %s" % (device, node, instance.name))
7696   if device.physical_id is None:
7697     device.physical_id = result.payload
7698
7699
7700 def _GenerateUniqueNames(lu, exts):
7701   """Generate a suitable LV name.
7702
7703   This will generate a logical volume name for the given instance.
7704
7705   """
7706   results = []
7707   for val in exts:
7708     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7709     results.append("%s%s" % (new_id, val))
7710   return results
7711
7712
7713 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7714                          iv_name, p_minor, s_minor):
7715   """Generate a drbd8 device complete with its children.
7716
7717   """
7718   assert len(vgnames) == len(names) == 2
7719   port = lu.cfg.AllocatePort()
7720   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7721   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7722                           logical_id=(vgnames[0], names[0]))
7723   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7724                           logical_id=(vgnames[1], names[1]))
7725   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7726                           logical_id=(primary, secondary, port,
7727                                       p_minor, s_minor,
7728                                       shared_secret),
7729                           children=[dev_data, dev_meta],
7730                           iv_name=iv_name)
7731   return drbd_dev
7732
7733
7734 def _GenerateDiskTemplate(lu, template_name,
7735                           instance_name, primary_node,
7736                           secondary_nodes, disk_info,
7737                           file_storage_dir, file_driver,
7738                           base_index, feedback_fn):
7739   """Generate the entire disk layout for a given template type.
7740
7741   """
7742   #TODO: compute space requirements
7743
7744   vgname = lu.cfg.GetVGName()
7745   disk_count = len(disk_info)
7746   disks = []
7747   if template_name == constants.DT_DISKLESS:
7748     pass
7749   elif template_name == constants.DT_PLAIN:
7750     if len(secondary_nodes) != 0:
7751       raise errors.ProgrammerError("Wrong template configuration")
7752
7753     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7754                                       for i in range(disk_count)])
7755     for idx, disk in enumerate(disk_info):
7756       disk_index = idx + base_index
7757       vg = disk.get(constants.IDISK_VG, vgname)
7758       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7759       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7760                               size=disk[constants.IDISK_SIZE],
7761                               logical_id=(vg, names[idx]),
7762                               iv_name="disk/%d" % disk_index,
7763                               mode=disk[constants.IDISK_MODE])
7764       disks.append(disk_dev)
7765   elif template_name == constants.DT_DRBD8:
7766     if len(secondary_nodes) != 1:
7767       raise errors.ProgrammerError("Wrong template configuration")
7768     remote_node = secondary_nodes[0]
7769     minors = lu.cfg.AllocateDRBDMinor(
7770       [primary_node, remote_node] * len(disk_info), instance_name)
7771
7772     names = []
7773     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7774                                                for i in range(disk_count)]):
7775       names.append(lv_prefix + "_data")
7776       names.append(lv_prefix + "_meta")
7777     for idx, disk in enumerate(disk_info):
7778       disk_index = idx + base_index
7779       data_vg = disk.get(constants.IDISK_VG, vgname)
7780       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7781       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7782                                       disk[constants.IDISK_SIZE],
7783                                       [data_vg, meta_vg],
7784                                       names[idx * 2:idx * 2 + 2],
7785                                       "disk/%d" % disk_index,
7786                                       minors[idx * 2], minors[idx * 2 + 1])
7787       disk_dev.mode = disk[constants.IDISK_MODE]
7788       disks.append(disk_dev)
7789   elif template_name == constants.DT_FILE:
7790     if len(secondary_nodes) != 0:
7791       raise errors.ProgrammerError("Wrong template configuration")
7792
7793     opcodes.RequireFileStorage()
7794
7795     for idx, disk in enumerate(disk_info):
7796       disk_index = idx + base_index
7797       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7798                               size=disk[constants.IDISK_SIZE],
7799                               iv_name="disk/%d" % disk_index,
7800                               logical_id=(file_driver,
7801                                           "%s/disk%d" % (file_storage_dir,
7802                                                          disk_index)),
7803                               mode=disk[constants.IDISK_MODE])
7804       disks.append(disk_dev)
7805   elif template_name == constants.DT_SHARED_FILE:
7806     if len(secondary_nodes) != 0:
7807       raise errors.ProgrammerError("Wrong template configuration")
7808
7809     opcodes.RequireSharedFileStorage()
7810
7811     for idx, disk in enumerate(disk_info):
7812       disk_index = idx + base_index
7813       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7814                               size=disk[constants.IDISK_SIZE],
7815                               iv_name="disk/%d" % disk_index,
7816                               logical_id=(file_driver,
7817                                           "%s/disk%d" % (file_storage_dir,
7818                                                          disk_index)),
7819                               mode=disk[constants.IDISK_MODE])
7820       disks.append(disk_dev)
7821   elif template_name == constants.DT_BLOCK:
7822     if len(secondary_nodes) != 0:
7823       raise errors.ProgrammerError("Wrong template configuration")
7824
7825     for idx, disk in enumerate(disk_info):
7826       disk_index = idx + base_index
7827       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7828                               size=disk[constants.IDISK_SIZE],
7829                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7830                                           disk[constants.IDISK_ADOPT]),
7831                               iv_name="disk/%d" % disk_index,
7832                               mode=disk[constants.IDISK_MODE])
7833       disks.append(disk_dev)
7834
7835   else:
7836     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7837   return disks
7838
7839
7840 def _GetInstanceInfoText(instance):
7841   """Compute that text that should be added to the disk's metadata.
7842
7843   """
7844   return "originstname+%s" % instance.name
7845
7846
7847 def _CalcEta(time_taken, written, total_size):
7848   """Calculates the ETA based on size written and total size.
7849
7850   @param time_taken: The time taken so far
7851   @param written: amount written so far
7852   @param total_size: The total size of data to be written
7853   @return: The remaining time in seconds
7854
7855   """
7856   avg_time = time_taken / float(written)
7857   return (total_size - written) * avg_time
7858
7859
7860 def _WipeDisks(lu, instance):
7861   """Wipes instance disks.
7862
7863   @type lu: L{LogicalUnit}
7864   @param lu: the logical unit on whose behalf we execute
7865   @type instance: L{objects.Instance}
7866   @param instance: the instance whose disks we should create
7867   @return: the success of the wipe
7868
7869   """
7870   node = instance.primary_node
7871
7872   for device in instance.disks:
7873     lu.cfg.SetDiskID(device, node)
7874
7875   logging.info("Pause sync of instance %s disks", instance.name)
7876   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7877
7878   for idx, success in enumerate(result.payload):
7879     if not success:
7880       logging.warn("pause-sync of instance %s for disks %d failed",
7881                    instance.name, idx)
7882
7883   try:
7884     for idx, device in enumerate(instance.disks):
7885       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7886       # MAX_WIPE_CHUNK at max
7887       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7888                             constants.MIN_WIPE_CHUNK_PERCENT)
7889       # we _must_ make this an int, otherwise rounding errors will
7890       # occur
7891       wipe_chunk_size = int(wipe_chunk_size)
7892
7893       lu.LogInfo("* Wiping disk %d", idx)
7894       logging.info("Wiping disk %d for instance %s, node %s using"
7895                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7896
7897       offset = 0
7898       size = device.size
7899       last_output = 0
7900       start_time = time.time()
7901
7902       while offset < size:
7903         wipe_size = min(wipe_chunk_size, size - offset)
7904         logging.debug("Wiping disk %d, offset %s, chunk %s",
7905                       idx, offset, wipe_size)
7906         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7907         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7908                      (idx, offset, wipe_size))
7909         now = time.time()
7910         offset += wipe_size
7911         if now - last_output >= 60:
7912           eta = _CalcEta(now - start_time, offset, size)
7913           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7914                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7915           last_output = now
7916   finally:
7917     logging.info("Resume sync of instance %s disks", instance.name)
7918
7919     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7920
7921     for idx, success in enumerate(result.payload):
7922       if not success:
7923         lu.LogWarning("Resume sync of disk %d failed, please have a"
7924                       " look at the status and troubleshoot the issue", idx)
7925         logging.warn("resume-sync of instance %s for disks %d failed",
7926                      instance.name, idx)
7927
7928
7929 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7930   """Create all disks for an instance.
7931
7932   This abstracts away some work from AddInstance.
7933
7934   @type lu: L{LogicalUnit}
7935   @param lu: the logical unit on whose behalf we execute
7936   @type instance: L{objects.Instance}
7937   @param instance: the instance whose disks we should create
7938   @type to_skip: list
7939   @param to_skip: list of indices to skip
7940   @type target_node: string
7941   @param target_node: if passed, overrides the target node for creation
7942   @rtype: boolean
7943   @return: the success of the creation
7944
7945   """
7946   info = _GetInstanceInfoText(instance)
7947   if target_node is None:
7948     pnode = instance.primary_node
7949     all_nodes = instance.all_nodes
7950   else:
7951     pnode = target_node
7952     all_nodes = [pnode]
7953
7954   if instance.disk_template in constants.DTS_FILEBASED:
7955     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7956     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7957
7958     result.Raise("Failed to create directory '%s' on"
7959                  " node %s" % (file_storage_dir, pnode))
7960
7961   # Note: this needs to be kept in sync with adding of disks in
7962   # LUInstanceSetParams
7963   for idx, device in enumerate(instance.disks):
7964     if to_skip and idx in to_skip:
7965       continue
7966     logging.info("Creating volume %s for instance %s",
7967                  device.iv_name, instance.name)
7968     #HARDCODE
7969     for node in all_nodes:
7970       f_create = node == pnode
7971       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7972
7973
7974 def _RemoveDisks(lu, instance, target_node=None):
7975   """Remove all disks for an instance.
7976
7977   This abstracts away some work from `AddInstance()` and
7978   `RemoveInstance()`. Note that in case some of the devices couldn't
7979   be removed, the removal will continue with the other ones (compare
7980   with `_CreateDisks()`).
7981
7982   @type lu: L{LogicalUnit}
7983   @param lu: the logical unit on whose behalf we execute
7984   @type instance: L{objects.Instance}
7985   @param instance: the instance whose disks we should remove
7986   @type target_node: string
7987   @param target_node: used to override the node on which to remove the disks
7988   @rtype: boolean
7989   @return: the success of the removal
7990
7991   """
7992   logging.info("Removing block devices for instance %s", instance.name)
7993
7994   all_result = True
7995   for device in instance.disks:
7996     if target_node:
7997       edata = [(target_node, device)]
7998     else:
7999       edata = device.ComputeNodeTree(instance.primary_node)
8000     for node, disk in edata:
8001       lu.cfg.SetDiskID(disk, node)
8002       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8003       if msg:
8004         lu.LogWarning("Could not remove block device %s on node %s,"
8005                       " continuing anyway: %s", device.iv_name, node, msg)
8006         all_result = False
8007
8008     # if this is a DRBD disk, return its port to the pool
8009     if device.dev_type in constants.LDS_DRBD:
8010       tcp_port = device.logical_id[2]
8011       lu.cfg.AddTcpUdpPort(tcp_port)
8012
8013   if instance.disk_template == constants.DT_FILE:
8014     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8015     if target_node:
8016       tgt = target_node
8017     else:
8018       tgt = instance.primary_node
8019     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8020     if result.fail_msg:
8021       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8022                     file_storage_dir, instance.primary_node, result.fail_msg)
8023       all_result = False
8024
8025   return all_result
8026
8027
8028 def _ComputeDiskSizePerVG(disk_template, disks):
8029   """Compute disk size requirements in the volume group
8030
8031   """
8032   def _compute(disks, payload):
8033     """Universal algorithm.
8034
8035     """
8036     vgs = {}
8037     for disk in disks:
8038       vgs[disk[constants.IDISK_VG]] = \
8039         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8040
8041     return vgs
8042
8043   # Required free disk space as a function of disk and swap space
8044   req_size_dict = {
8045     constants.DT_DISKLESS: {},
8046     constants.DT_PLAIN: _compute(disks, 0),
8047     # 128 MB are added for drbd metadata for each disk
8048     constants.DT_DRBD8: _compute(disks, 128),
8049     constants.DT_FILE: {},
8050     constants.DT_SHARED_FILE: {},
8051   }
8052
8053   if disk_template not in req_size_dict:
8054     raise errors.ProgrammerError("Disk template '%s' size requirement"
8055                                  " is unknown" % disk_template)
8056
8057   return req_size_dict[disk_template]
8058
8059
8060 def _ComputeDiskSize(disk_template, disks):
8061   """Compute disk size requirements in the volume group
8062
8063   """
8064   # Required free disk space as a function of disk and swap space
8065   req_size_dict = {
8066     constants.DT_DISKLESS: None,
8067     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8068     # 128 MB are added for drbd metadata for each disk
8069     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8070     constants.DT_FILE: None,
8071     constants.DT_SHARED_FILE: 0,
8072     constants.DT_BLOCK: 0,
8073   }
8074
8075   if disk_template not in req_size_dict:
8076     raise errors.ProgrammerError("Disk template '%s' size requirement"
8077                                  " is unknown" % disk_template)
8078
8079   return req_size_dict[disk_template]
8080
8081
8082 def _FilterVmNodes(lu, nodenames):
8083   """Filters out non-vm_capable nodes from a list.
8084
8085   @type lu: L{LogicalUnit}
8086   @param lu: the logical unit for which we check
8087   @type nodenames: list
8088   @param nodenames: the list of nodes on which we should check
8089   @rtype: list
8090   @return: the list of vm-capable nodes
8091
8092   """
8093   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8094   return [name for name in nodenames if name not in vm_nodes]
8095
8096
8097 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8098   """Hypervisor parameter validation.
8099
8100   This function abstract the hypervisor parameter validation to be
8101   used in both instance create and instance modify.
8102
8103   @type lu: L{LogicalUnit}
8104   @param lu: the logical unit for which we check
8105   @type nodenames: list
8106   @param nodenames: the list of nodes on which we should check
8107   @type hvname: string
8108   @param hvname: the name of the hypervisor we should use
8109   @type hvparams: dict
8110   @param hvparams: the parameters which we need to check
8111   @raise errors.OpPrereqError: if the parameters are not valid
8112
8113   """
8114   nodenames = _FilterVmNodes(lu, nodenames)
8115   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8116                                                   hvname,
8117                                                   hvparams)
8118   for node in nodenames:
8119     info = hvinfo[node]
8120     if info.offline:
8121       continue
8122     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8123
8124
8125 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8126   """OS parameters validation.
8127
8128   @type lu: L{LogicalUnit}
8129   @param lu: the logical unit for which we check
8130   @type required: boolean
8131   @param required: whether the validation should fail if the OS is not
8132       found
8133   @type nodenames: list
8134   @param nodenames: the list of nodes on which we should check
8135   @type osname: string
8136   @param osname: the name of the hypervisor we should use
8137   @type osparams: dict
8138   @param osparams: the parameters which we need to check
8139   @raise errors.OpPrereqError: if the parameters are not valid
8140
8141   """
8142   nodenames = _FilterVmNodes(lu, nodenames)
8143   result = lu.rpc.call_os_validate(required, nodenames, osname,
8144                                    [constants.OS_VALIDATE_PARAMETERS],
8145                                    osparams)
8146   for node, nres in result.items():
8147     # we don't check for offline cases since this should be run only
8148     # against the master node and/or an instance's nodes
8149     nres.Raise("OS Parameters validation failed on node %s" % node)
8150     if not nres.payload:
8151       lu.LogInfo("OS %s not found on node %s, validation skipped",
8152                  osname, node)
8153
8154
8155 class LUInstanceCreate(LogicalUnit):
8156   """Create an instance.
8157
8158   """
8159   HPATH = "instance-add"
8160   HTYPE = constants.HTYPE_INSTANCE
8161   REQ_BGL = False
8162
8163   def CheckArguments(self):
8164     """Check arguments.
8165
8166     """
8167     # do not require name_check to ease forward/backward compatibility
8168     # for tools
8169     if self.op.no_install and self.op.start:
8170       self.LogInfo("No-installation mode selected, disabling startup")
8171       self.op.start = False
8172     # validate/normalize the instance name
8173     self.op.instance_name = \
8174       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8175
8176     if self.op.ip_check and not self.op.name_check:
8177       # TODO: make the ip check more flexible and not depend on the name check
8178       raise errors.OpPrereqError("Cannot do IP address check without a name"
8179                                  " check", errors.ECODE_INVAL)
8180
8181     # check nics' parameter names
8182     for nic in self.op.nics:
8183       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8184
8185     # check disks. parameter names and consistent adopt/no-adopt strategy
8186     has_adopt = has_no_adopt = False
8187     for disk in self.op.disks:
8188       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8189       if constants.IDISK_ADOPT in disk:
8190         has_adopt = True
8191       else:
8192         has_no_adopt = True
8193     if has_adopt and has_no_adopt:
8194       raise errors.OpPrereqError("Either all disks are adopted or none is",
8195                                  errors.ECODE_INVAL)
8196     if has_adopt:
8197       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8198         raise errors.OpPrereqError("Disk adoption is not supported for the"
8199                                    " '%s' disk template" %
8200                                    self.op.disk_template,
8201                                    errors.ECODE_INVAL)
8202       if self.op.iallocator is not None:
8203         raise errors.OpPrereqError("Disk adoption not allowed with an"
8204                                    " iallocator script", errors.ECODE_INVAL)
8205       if self.op.mode == constants.INSTANCE_IMPORT:
8206         raise errors.OpPrereqError("Disk adoption not allowed for"
8207                                    " instance import", errors.ECODE_INVAL)
8208     else:
8209       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8210         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8211                                    " but no 'adopt' parameter given" %
8212                                    self.op.disk_template,
8213                                    errors.ECODE_INVAL)
8214
8215     self.adopt_disks = has_adopt
8216
8217     # instance name verification
8218     if self.op.name_check:
8219       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8220       self.op.instance_name = self.hostname1.name
8221       # used in CheckPrereq for ip ping check
8222       self.check_ip = self.hostname1.ip
8223     else:
8224       self.check_ip = None
8225
8226     # file storage checks
8227     if (self.op.file_driver and
8228         not self.op.file_driver in constants.FILE_DRIVER):
8229       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8230                                  self.op.file_driver, errors.ECODE_INVAL)
8231
8232     if self.op.disk_template == constants.DT_FILE:
8233       opcodes.RequireFileStorage()
8234     elif self.op.disk_template == constants.DT_SHARED_FILE:
8235       opcodes.RequireSharedFileStorage()
8236
8237     ### Node/iallocator related checks
8238     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8239
8240     if self.op.pnode is not None:
8241       if self.op.disk_template in constants.DTS_INT_MIRROR:
8242         if self.op.snode is None:
8243           raise errors.OpPrereqError("The networked disk templates need"
8244                                      " a mirror node", errors.ECODE_INVAL)
8245       elif self.op.snode:
8246         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8247                         " template")
8248         self.op.snode = None
8249
8250     self._cds = _GetClusterDomainSecret()
8251
8252     if self.op.mode == constants.INSTANCE_IMPORT:
8253       # On import force_variant must be True, because if we forced it at
8254       # initial install, our only chance when importing it back is that it
8255       # works again!
8256       self.op.force_variant = True
8257
8258       if self.op.no_install:
8259         self.LogInfo("No-installation mode has no effect during import")
8260
8261     elif self.op.mode == constants.INSTANCE_CREATE:
8262       if self.op.os_type is None:
8263         raise errors.OpPrereqError("No guest OS specified",
8264                                    errors.ECODE_INVAL)
8265       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8266         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8267                                    " installation" % self.op.os_type,
8268                                    errors.ECODE_STATE)
8269       if self.op.disk_template is None:
8270         raise errors.OpPrereqError("No disk template specified",
8271                                    errors.ECODE_INVAL)
8272
8273     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8274       # Check handshake to ensure both clusters have the same domain secret
8275       src_handshake = self.op.source_handshake
8276       if not src_handshake:
8277         raise errors.OpPrereqError("Missing source handshake",
8278                                    errors.ECODE_INVAL)
8279
8280       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8281                                                            src_handshake)
8282       if errmsg:
8283         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8284                                    errors.ECODE_INVAL)
8285
8286       # Load and check source CA
8287       self.source_x509_ca_pem = self.op.source_x509_ca
8288       if not self.source_x509_ca_pem:
8289         raise errors.OpPrereqError("Missing source X509 CA",
8290                                    errors.ECODE_INVAL)
8291
8292       try:
8293         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8294                                                     self._cds)
8295       except OpenSSL.crypto.Error, err:
8296         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8297                                    (err, ), errors.ECODE_INVAL)
8298
8299       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8300       if errcode is not None:
8301         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8302                                    errors.ECODE_INVAL)
8303
8304       self.source_x509_ca = cert
8305
8306       src_instance_name = self.op.source_instance_name
8307       if not src_instance_name:
8308         raise errors.OpPrereqError("Missing source instance name",
8309                                    errors.ECODE_INVAL)
8310
8311       self.source_instance_name = \
8312           netutils.GetHostname(name=src_instance_name).name
8313
8314     else:
8315       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8316                                  self.op.mode, errors.ECODE_INVAL)
8317
8318   def ExpandNames(self):
8319     """ExpandNames for CreateInstance.
8320
8321     Figure out the right locks for instance creation.
8322
8323     """
8324     self.needed_locks = {}
8325
8326     instance_name = self.op.instance_name
8327     # this is just a preventive check, but someone might still add this
8328     # instance in the meantime, and creation will fail at lock-add time
8329     if instance_name in self.cfg.GetInstanceList():
8330       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8331                                  instance_name, errors.ECODE_EXISTS)
8332
8333     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8334
8335     if self.op.iallocator:
8336       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8337     else:
8338       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8339       nodelist = [self.op.pnode]
8340       if self.op.snode is not None:
8341         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8342         nodelist.append(self.op.snode)
8343       self.needed_locks[locking.LEVEL_NODE] = nodelist
8344
8345     # in case of import lock the source node too
8346     if self.op.mode == constants.INSTANCE_IMPORT:
8347       src_node = self.op.src_node
8348       src_path = self.op.src_path
8349
8350       if src_path is None:
8351         self.op.src_path = src_path = self.op.instance_name
8352
8353       if src_node is None:
8354         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8355         self.op.src_node = None
8356         if os.path.isabs(src_path):
8357           raise errors.OpPrereqError("Importing an instance from a path"
8358                                      " requires a source node option",
8359                                      errors.ECODE_INVAL)
8360       else:
8361         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8362         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8363           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8364         if not os.path.isabs(src_path):
8365           self.op.src_path = src_path = \
8366             utils.PathJoin(constants.EXPORT_DIR, src_path)
8367
8368   def _RunAllocator(self):
8369     """Run the allocator based on input opcode.
8370
8371     """
8372     nics = [n.ToDict() for n in self.nics]
8373     ial = IAllocator(self.cfg, self.rpc,
8374                      mode=constants.IALLOCATOR_MODE_ALLOC,
8375                      name=self.op.instance_name,
8376                      disk_template=self.op.disk_template,
8377                      tags=self.op.tags,
8378                      os=self.op.os_type,
8379                      vcpus=self.be_full[constants.BE_VCPUS],
8380                      memory=self.be_full[constants.BE_MEMORY],
8381                      disks=self.disks,
8382                      nics=nics,
8383                      hypervisor=self.op.hypervisor,
8384                      )
8385
8386     ial.Run(self.op.iallocator)
8387
8388     if not ial.success:
8389       raise errors.OpPrereqError("Can't compute nodes using"
8390                                  " iallocator '%s': %s" %
8391                                  (self.op.iallocator, ial.info),
8392                                  errors.ECODE_NORES)
8393     if len(ial.result) != ial.required_nodes:
8394       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8395                                  " of nodes (%s), required %s" %
8396                                  (self.op.iallocator, len(ial.result),
8397                                   ial.required_nodes), errors.ECODE_FAULT)
8398     self.op.pnode = ial.result[0]
8399     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8400                  self.op.instance_name, self.op.iallocator,
8401                  utils.CommaJoin(ial.result))
8402     if ial.required_nodes == 2:
8403       self.op.snode = ial.result[1]
8404
8405   def BuildHooksEnv(self):
8406     """Build hooks env.
8407
8408     This runs on master, primary and secondary nodes of the instance.
8409
8410     """
8411     env = {
8412       "ADD_MODE": self.op.mode,
8413       }
8414     if self.op.mode == constants.INSTANCE_IMPORT:
8415       env["SRC_NODE"] = self.op.src_node
8416       env["SRC_PATH"] = self.op.src_path
8417       env["SRC_IMAGES"] = self.src_images
8418
8419     env.update(_BuildInstanceHookEnv(
8420       name=self.op.instance_name,
8421       primary_node=self.op.pnode,
8422       secondary_nodes=self.secondaries,
8423       status=self.op.start,
8424       os_type=self.op.os_type,
8425       memory=self.be_full[constants.BE_MEMORY],
8426       vcpus=self.be_full[constants.BE_VCPUS],
8427       nics=_NICListToTuple(self, self.nics),
8428       disk_template=self.op.disk_template,
8429       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8430              for d in self.disks],
8431       bep=self.be_full,
8432       hvp=self.hv_full,
8433       hypervisor_name=self.op.hypervisor,
8434       tags=self.op.tags,
8435     ))
8436
8437     return env
8438
8439   def BuildHooksNodes(self):
8440     """Build hooks nodes.
8441
8442     """
8443     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8444     return nl, nl
8445
8446   def _ReadExportInfo(self):
8447     """Reads the export information from disk.
8448
8449     It will override the opcode source node and path with the actual
8450     information, if these two were not specified before.
8451
8452     @return: the export information
8453
8454     """
8455     assert self.op.mode == constants.INSTANCE_IMPORT
8456
8457     src_node = self.op.src_node
8458     src_path = self.op.src_path
8459
8460     if src_node is None:
8461       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8462       exp_list = self.rpc.call_export_list(locked_nodes)
8463       found = False
8464       for node in exp_list:
8465         if exp_list[node].fail_msg:
8466           continue
8467         if src_path in exp_list[node].payload:
8468           found = True
8469           self.op.src_node = src_node = node
8470           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8471                                                        src_path)
8472           break
8473       if not found:
8474         raise errors.OpPrereqError("No export found for relative path %s" %
8475                                     src_path, errors.ECODE_INVAL)
8476
8477     _CheckNodeOnline(self, src_node)
8478     result = self.rpc.call_export_info(src_node, src_path)
8479     result.Raise("No export or invalid export found in dir %s" % src_path)
8480
8481     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8482     if not export_info.has_section(constants.INISECT_EXP):
8483       raise errors.ProgrammerError("Corrupted export config",
8484                                    errors.ECODE_ENVIRON)
8485
8486     ei_version = export_info.get(constants.INISECT_EXP, "version")
8487     if (int(ei_version) != constants.EXPORT_VERSION):
8488       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8489                                  (ei_version, constants.EXPORT_VERSION),
8490                                  errors.ECODE_ENVIRON)
8491     return export_info
8492
8493   def _ReadExportParams(self, einfo):
8494     """Use export parameters as defaults.
8495
8496     In case the opcode doesn't specify (as in override) some instance
8497     parameters, then try to use them from the export information, if
8498     that declares them.
8499
8500     """
8501     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8502
8503     if self.op.disk_template is None:
8504       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8505         self.op.disk_template = einfo.get(constants.INISECT_INS,
8506                                           "disk_template")
8507       else:
8508         raise errors.OpPrereqError("No disk template specified and the export"
8509                                    " is missing the disk_template information",
8510                                    errors.ECODE_INVAL)
8511
8512     if not self.op.disks:
8513       if einfo.has_option(constants.INISECT_INS, "disk_count"):
8514         disks = []
8515         # TODO: import the disk iv_name too
8516         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8517           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8518           disks.append({constants.IDISK_SIZE: disk_sz})
8519         self.op.disks = disks
8520       else:
8521         raise errors.OpPrereqError("No disk info specified and the export"
8522                                    " is missing the disk information",
8523                                    errors.ECODE_INVAL)
8524
8525     if (not self.op.nics and
8526         einfo.has_option(constants.INISECT_INS, "nic_count")):
8527       nics = []
8528       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8529         ndict = {}
8530         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8531           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8532           ndict[name] = v
8533         nics.append(ndict)
8534       self.op.nics = nics
8535
8536     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8537       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8538
8539     if (self.op.hypervisor is None and
8540         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8541       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8542
8543     if einfo.has_section(constants.INISECT_HYP):
8544       # use the export parameters but do not override the ones
8545       # specified by the user
8546       for name, value in einfo.items(constants.INISECT_HYP):
8547         if name not in self.op.hvparams:
8548           self.op.hvparams[name] = value
8549
8550     if einfo.has_section(constants.INISECT_BEP):
8551       # use the parameters, without overriding
8552       for name, value in einfo.items(constants.INISECT_BEP):
8553         if name not in self.op.beparams:
8554           self.op.beparams[name] = value
8555     else:
8556       # try to read the parameters old style, from the main section
8557       for name in constants.BES_PARAMETERS:
8558         if (name not in self.op.beparams and
8559             einfo.has_option(constants.INISECT_INS, name)):
8560           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8561
8562     if einfo.has_section(constants.INISECT_OSP):
8563       # use the parameters, without overriding
8564       for name, value in einfo.items(constants.INISECT_OSP):
8565         if name not in self.op.osparams:
8566           self.op.osparams[name] = value
8567
8568   def _RevertToDefaults(self, cluster):
8569     """Revert the instance parameters to the default values.
8570
8571     """
8572     # hvparams
8573     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8574     for name in self.op.hvparams.keys():
8575       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8576         del self.op.hvparams[name]
8577     # beparams
8578     be_defs = cluster.SimpleFillBE({})
8579     for name in self.op.beparams.keys():
8580       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8581         del self.op.beparams[name]
8582     # nic params
8583     nic_defs = cluster.SimpleFillNIC({})
8584     for nic in self.op.nics:
8585       for name in constants.NICS_PARAMETERS:
8586         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8587           del nic[name]
8588     # osparams
8589     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8590     for name in self.op.osparams.keys():
8591       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8592         del self.op.osparams[name]
8593
8594   def _CalculateFileStorageDir(self):
8595     """Calculate final instance file storage dir.
8596
8597     """
8598     # file storage dir calculation/check
8599     self.instance_file_storage_dir = None
8600     if self.op.disk_template in constants.DTS_FILEBASED:
8601       # build the full file storage dir path
8602       joinargs = []
8603
8604       if self.op.disk_template == constants.DT_SHARED_FILE:
8605         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8606       else:
8607         get_fsd_fn = self.cfg.GetFileStorageDir
8608
8609       cfg_storagedir = get_fsd_fn()
8610       if not cfg_storagedir:
8611         raise errors.OpPrereqError("Cluster file storage dir not defined")
8612       joinargs.append(cfg_storagedir)
8613
8614       if self.op.file_storage_dir is not None:
8615         joinargs.append(self.op.file_storage_dir)
8616
8617       joinargs.append(self.op.instance_name)
8618
8619       # pylint: disable=W0142
8620       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8621
8622   def CheckPrereq(self):
8623     """Check prerequisites.
8624
8625     """
8626     self._CalculateFileStorageDir()
8627
8628     if self.op.mode == constants.INSTANCE_IMPORT:
8629       export_info = self._ReadExportInfo()
8630       self._ReadExportParams(export_info)
8631
8632     if (not self.cfg.GetVGName() and
8633         self.op.disk_template not in constants.DTS_NOT_LVM):
8634       raise errors.OpPrereqError("Cluster does not support lvm-based"
8635                                  " instances", errors.ECODE_STATE)
8636
8637     if self.op.hypervisor is None:
8638       self.op.hypervisor = self.cfg.GetHypervisorType()
8639
8640     cluster = self.cfg.GetClusterInfo()
8641     enabled_hvs = cluster.enabled_hypervisors
8642     if self.op.hypervisor not in enabled_hvs:
8643       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8644                                  " cluster (%s)" % (self.op.hypervisor,
8645                                   ",".join(enabled_hvs)),
8646                                  errors.ECODE_STATE)
8647
8648     # Check tag validity
8649     for tag in self.op.tags:
8650       objects.TaggableObject.ValidateTag(tag)
8651
8652     # check hypervisor parameter syntax (locally)
8653     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8654     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8655                                       self.op.hvparams)
8656     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8657     hv_type.CheckParameterSyntax(filled_hvp)
8658     self.hv_full = filled_hvp
8659     # check that we don't specify global parameters on an instance
8660     _CheckGlobalHvParams(self.op.hvparams)
8661
8662     # fill and remember the beparams dict
8663     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8664     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8665
8666     # build os parameters
8667     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8668
8669     # now that hvp/bep are in final format, let's reset to defaults,
8670     # if told to do so
8671     if self.op.identify_defaults:
8672       self._RevertToDefaults(cluster)
8673
8674     # NIC buildup
8675     self.nics = []
8676     for idx, nic in enumerate(self.op.nics):
8677       nic_mode_req = nic.get(constants.INIC_MODE, None)
8678       nic_mode = nic_mode_req
8679       if nic_mode is None:
8680         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8681
8682       # in routed mode, for the first nic, the default ip is 'auto'
8683       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8684         default_ip_mode = constants.VALUE_AUTO
8685       else:
8686         default_ip_mode = constants.VALUE_NONE
8687
8688       # ip validity checks
8689       ip = nic.get(constants.INIC_IP, default_ip_mode)
8690       if ip is None or ip.lower() == constants.VALUE_NONE:
8691         nic_ip = None
8692       elif ip.lower() == constants.VALUE_AUTO:
8693         if not self.op.name_check:
8694           raise errors.OpPrereqError("IP address set to auto but name checks"
8695                                      " have been skipped",
8696                                      errors.ECODE_INVAL)
8697         nic_ip = self.hostname1.ip
8698       else:
8699         if not netutils.IPAddress.IsValid(ip):
8700           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8701                                      errors.ECODE_INVAL)
8702         nic_ip = ip
8703
8704       # TODO: check the ip address for uniqueness
8705       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8706         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8707                                    errors.ECODE_INVAL)
8708
8709       # MAC address verification
8710       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8711       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8712         mac = utils.NormalizeAndValidateMac(mac)
8713
8714         try:
8715           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8716         except errors.ReservationError:
8717           raise errors.OpPrereqError("MAC address %s already in use"
8718                                      " in cluster" % mac,
8719                                      errors.ECODE_NOTUNIQUE)
8720
8721       #  Build nic parameters
8722       link = nic.get(constants.INIC_LINK, None)
8723       nicparams = {}
8724       if nic_mode_req:
8725         nicparams[constants.NIC_MODE] = nic_mode_req
8726       if link:
8727         nicparams[constants.NIC_LINK] = link
8728
8729       check_params = cluster.SimpleFillNIC(nicparams)
8730       objects.NIC.CheckParameterSyntax(check_params)
8731       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8732
8733     # disk checks/pre-build
8734     default_vg = self.cfg.GetVGName()
8735     self.disks = []
8736     for disk in self.op.disks:
8737       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8738       if mode not in constants.DISK_ACCESS_SET:
8739         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8740                                    mode, errors.ECODE_INVAL)
8741       size = disk.get(constants.IDISK_SIZE, None)
8742       if size is None:
8743         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8744       try:
8745         size = int(size)
8746       except (TypeError, ValueError):
8747         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8748                                    errors.ECODE_INVAL)
8749
8750       data_vg = disk.get(constants.IDISK_VG, default_vg)
8751       new_disk = {
8752         constants.IDISK_SIZE: size,
8753         constants.IDISK_MODE: mode,
8754         constants.IDISK_VG: data_vg,
8755         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8756         }
8757       if constants.IDISK_ADOPT in disk:
8758         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8759       self.disks.append(new_disk)
8760
8761     if self.op.mode == constants.INSTANCE_IMPORT:
8762
8763       # Check that the new instance doesn't have less disks than the export
8764       instance_disks = len(self.disks)
8765       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8766       if instance_disks < export_disks:
8767         raise errors.OpPrereqError("Not enough disks to import."
8768                                    " (instance: %d, export: %d)" %
8769                                    (instance_disks, export_disks),
8770                                    errors.ECODE_INVAL)
8771
8772       disk_images = []
8773       for idx in range(export_disks):
8774         option = "disk%d_dump" % idx
8775         if export_info.has_option(constants.INISECT_INS, option):
8776           # FIXME: are the old os-es, disk sizes, etc. useful?
8777           export_name = export_info.get(constants.INISECT_INS, option)
8778           image = utils.PathJoin(self.op.src_path, export_name)
8779           disk_images.append(image)
8780         else:
8781           disk_images.append(False)
8782
8783       self.src_images = disk_images
8784
8785       old_name = export_info.get(constants.INISECT_INS, "name")
8786       try:
8787         exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8788       except (TypeError, ValueError), err:
8789         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8790                                    " an integer: %s" % str(err),
8791                                    errors.ECODE_STATE)
8792       if self.op.instance_name == old_name:
8793         for idx, nic in enumerate(self.nics):
8794           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8795             nic_mac_ini = "nic%d_mac" % idx
8796             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8797
8798     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8799
8800     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8801     if self.op.ip_check:
8802       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8803         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8804                                    (self.check_ip, self.op.instance_name),
8805                                    errors.ECODE_NOTUNIQUE)
8806
8807     #### mac address generation
8808     # By generating here the mac address both the allocator and the hooks get
8809     # the real final mac address rather than the 'auto' or 'generate' value.
8810     # There is a race condition between the generation and the instance object
8811     # creation, which means that we know the mac is valid now, but we're not
8812     # sure it will be when we actually add the instance. If things go bad
8813     # adding the instance will abort because of a duplicate mac, and the
8814     # creation job will fail.
8815     for nic in self.nics:
8816       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8817         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8818
8819     #### allocator run
8820
8821     if self.op.iallocator is not None:
8822       self._RunAllocator()
8823
8824     # Release all unneeded node locks
8825     _ReleaseLocks(self, locking.LEVEL_NODE,
8826                   keep=filter(None, [self.op.pnode, self.op.snode,
8827                                      self.op.src_node]))
8828
8829     #### node related checks
8830
8831     # check primary node
8832     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8833     assert self.pnode is not None, \
8834       "Cannot retrieve locked node %s" % self.op.pnode
8835     if pnode.offline:
8836       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8837                                  pnode.name, errors.ECODE_STATE)
8838     if pnode.drained:
8839       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8840                                  pnode.name, errors.ECODE_STATE)
8841     if not pnode.vm_capable:
8842       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8843                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8844
8845     self.secondaries = []
8846
8847     # mirror node verification
8848     if self.op.disk_template in constants.DTS_INT_MIRROR:
8849       if self.op.snode == pnode.name:
8850         raise errors.OpPrereqError("The secondary node cannot be the"
8851                                    " primary node", errors.ECODE_INVAL)
8852       _CheckNodeOnline(self, self.op.snode)
8853       _CheckNodeNotDrained(self, self.op.snode)
8854       _CheckNodeVmCapable(self, self.op.snode)
8855       self.secondaries.append(self.op.snode)
8856
8857     nodenames = [pnode.name] + self.secondaries
8858
8859     if not self.adopt_disks:
8860       # Check lv size requirements, if not adopting
8861       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8862       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8863
8864     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8865       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8866                                 disk[constants.IDISK_ADOPT])
8867                      for disk in self.disks])
8868       if len(all_lvs) != len(self.disks):
8869         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8870                                    errors.ECODE_INVAL)
8871       for lv_name in all_lvs:
8872         try:
8873           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8874           # to ReserveLV uses the same syntax
8875           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8876         except errors.ReservationError:
8877           raise errors.OpPrereqError("LV named %s used by another instance" %
8878                                      lv_name, errors.ECODE_NOTUNIQUE)
8879
8880       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8881       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8882
8883       node_lvs = self.rpc.call_lv_list([pnode.name],
8884                                        vg_names.payload.keys())[pnode.name]
8885       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8886       node_lvs = node_lvs.payload
8887
8888       delta = all_lvs.difference(node_lvs.keys())
8889       if delta:
8890         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8891                                    utils.CommaJoin(delta),
8892                                    errors.ECODE_INVAL)
8893       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8894       if online_lvs:
8895         raise errors.OpPrereqError("Online logical volumes found, cannot"
8896                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8897                                    errors.ECODE_STATE)
8898       # update the size of disk based on what is found
8899       for dsk in self.disks:
8900         dsk[constants.IDISK_SIZE] = \
8901           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8902                                         dsk[constants.IDISK_ADOPT])][0]))
8903
8904     elif self.op.disk_template == constants.DT_BLOCK:
8905       # Normalize and de-duplicate device paths
8906       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8907                        for disk in self.disks])
8908       if len(all_disks) != len(self.disks):
8909         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8910                                    errors.ECODE_INVAL)
8911       baddisks = [d for d in all_disks
8912                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8913       if baddisks:
8914         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8915                                    " cannot be adopted" %
8916                                    (", ".join(baddisks),
8917                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8918                                    errors.ECODE_INVAL)
8919
8920       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8921                                             list(all_disks))[pnode.name]
8922       node_disks.Raise("Cannot get block device information from node %s" %
8923                        pnode.name)
8924       node_disks = node_disks.payload
8925       delta = all_disks.difference(node_disks.keys())
8926       if delta:
8927         raise errors.OpPrereqError("Missing block device(s): %s" %
8928                                    utils.CommaJoin(delta),
8929                                    errors.ECODE_INVAL)
8930       for dsk in self.disks:
8931         dsk[constants.IDISK_SIZE] = \
8932           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8933
8934     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8935
8936     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8937     # check OS parameters (remotely)
8938     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8939
8940     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8941
8942     # memory check on primary node
8943     if self.op.start:
8944       _CheckNodeFreeMemory(self, self.pnode.name,
8945                            "creating instance %s" % self.op.instance_name,
8946                            self.be_full[constants.BE_MEMORY],
8947                            self.op.hypervisor)
8948
8949     self.dry_run_result = list(nodenames)
8950
8951   def Exec(self, feedback_fn):
8952     """Create and add the instance to the cluster.
8953
8954     """
8955     instance = self.op.instance_name
8956     pnode_name = self.pnode.name
8957
8958     ht_kind = self.op.hypervisor
8959     if ht_kind in constants.HTS_REQ_PORT:
8960       network_port = self.cfg.AllocatePort()
8961     else:
8962       network_port = None
8963
8964     disks = _GenerateDiskTemplate(self,
8965                                   self.op.disk_template,
8966                                   instance, pnode_name,
8967                                   self.secondaries,
8968                                   self.disks,
8969                                   self.instance_file_storage_dir,
8970                                   self.op.file_driver,
8971                                   0,
8972                                   feedback_fn)
8973
8974     iobj = objects.Instance(name=instance, os=self.op.os_type,
8975                             primary_node=pnode_name,
8976                             nics=self.nics, disks=disks,
8977                             disk_template=self.op.disk_template,
8978                             admin_up=False,
8979                             network_port=network_port,
8980                             beparams=self.op.beparams,
8981                             hvparams=self.op.hvparams,
8982                             hypervisor=self.op.hypervisor,
8983                             osparams=self.op.osparams,
8984                             )
8985
8986     if self.op.tags:
8987       for tag in self.op.tags:
8988         iobj.AddTag(tag)
8989
8990     if self.adopt_disks:
8991       if self.op.disk_template == constants.DT_PLAIN:
8992         # rename LVs to the newly-generated names; we need to construct
8993         # 'fake' LV disks with the old data, plus the new unique_id
8994         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8995         rename_to = []
8996         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8997           rename_to.append(t_dsk.logical_id)
8998           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8999           self.cfg.SetDiskID(t_dsk, pnode_name)
9000         result = self.rpc.call_blockdev_rename(pnode_name,
9001                                                zip(tmp_disks, rename_to))
9002         result.Raise("Failed to rename adoped LVs")
9003     else:
9004       feedback_fn("* creating instance disks...")
9005       try:
9006         _CreateDisks(self, iobj)
9007       except errors.OpExecError:
9008         self.LogWarning("Device creation failed, reverting...")
9009         try:
9010           _RemoveDisks(self, iobj)
9011         finally:
9012           self.cfg.ReleaseDRBDMinors(instance)
9013           raise
9014
9015     feedback_fn("adding instance %s to cluster config" % instance)
9016
9017     self.cfg.AddInstance(iobj, self.proc.GetECId())
9018
9019     # Declare that we don't want to remove the instance lock anymore, as we've
9020     # added the instance to the config
9021     del self.remove_locks[locking.LEVEL_INSTANCE]
9022
9023     if self.op.mode == constants.INSTANCE_IMPORT:
9024       # Release unused nodes
9025       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9026     else:
9027       # Release all nodes
9028       _ReleaseLocks(self, locking.LEVEL_NODE)
9029
9030     disk_abort = False
9031     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9032       feedback_fn("* wiping instance disks...")
9033       try:
9034         _WipeDisks(self, iobj)
9035       except errors.OpExecError, err:
9036         logging.exception("Wiping disks failed")
9037         self.LogWarning("Wiping instance disks failed (%s)", err)
9038         disk_abort = True
9039
9040     if disk_abort:
9041       # Something is already wrong with the disks, don't do anything else
9042       pass
9043     elif self.op.wait_for_sync:
9044       disk_abort = not _WaitForSync(self, iobj)
9045     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9046       # make sure the disks are not degraded (still sync-ing is ok)
9047       feedback_fn("* checking mirrors status")
9048       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9049     else:
9050       disk_abort = False
9051
9052     if disk_abort:
9053       _RemoveDisks(self, iobj)
9054       self.cfg.RemoveInstance(iobj.name)
9055       # Make sure the instance lock gets removed
9056       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9057       raise errors.OpExecError("There are some degraded disks for"
9058                                " this instance")
9059
9060     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9061       if self.op.mode == constants.INSTANCE_CREATE:
9062         if not self.op.no_install:
9063           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9064                         not self.op.wait_for_sync)
9065           if pause_sync:
9066             feedback_fn("* pausing disk sync to install instance OS")
9067             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9068                                                               iobj.disks, True)
9069             for idx, success in enumerate(result.payload):
9070               if not success:
9071                 logging.warn("pause-sync of instance %s for disk %d failed",
9072                              instance, idx)
9073
9074           feedback_fn("* running the instance OS create scripts...")
9075           # FIXME: pass debug option from opcode to backend
9076           os_add_result = \
9077             self.rpc.call_instance_os_add(pnode_name, iobj, False,
9078                                           self.op.debug_level)
9079           if pause_sync:
9080             feedback_fn("* resuming disk sync")
9081             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9082                                                               iobj.disks, False)
9083             for idx, success in enumerate(result.payload):
9084               if not success:
9085                 logging.warn("resume-sync of instance %s for disk %d failed",
9086                              instance, idx)
9087
9088           os_add_result.Raise("Could not add os for instance %s"
9089                               " on node %s" % (instance, pnode_name))
9090
9091       elif self.op.mode == constants.INSTANCE_IMPORT:
9092         feedback_fn("* running the instance OS import scripts...")
9093
9094         transfers = []
9095
9096         for idx, image in enumerate(self.src_images):
9097           if not image:
9098             continue
9099
9100           # FIXME: pass debug option from opcode to backend
9101           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9102                                              constants.IEIO_FILE, (image, ),
9103                                              constants.IEIO_SCRIPT,
9104                                              (iobj.disks[idx], idx),
9105                                              None)
9106           transfers.append(dt)
9107
9108         import_result = \
9109           masterd.instance.TransferInstanceData(self, feedback_fn,
9110                                                 self.op.src_node, pnode_name,
9111                                                 self.pnode.secondary_ip,
9112                                                 iobj, transfers)
9113         if not compat.all(import_result):
9114           self.LogWarning("Some disks for instance %s on node %s were not"
9115                           " imported successfully" % (instance, pnode_name))
9116
9117       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9118         feedback_fn("* preparing remote import...")
9119         # The source cluster will stop the instance before attempting to make a
9120         # connection. In some cases stopping an instance can take a long time,
9121         # hence the shutdown timeout is added to the connection timeout.
9122         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9123                            self.op.source_shutdown_timeout)
9124         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9125
9126         assert iobj.primary_node == self.pnode.name
9127         disk_results = \
9128           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9129                                         self.source_x509_ca,
9130                                         self._cds, timeouts)
9131         if not compat.all(disk_results):
9132           # TODO: Should the instance still be started, even if some disks
9133           # failed to import (valid for local imports, too)?
9134           self.LogWarning("Some disks for instance %s on node %s were not"
9135                           " imported successfully" % (instance, pnode_name))
9136
9137         # Run rename script on newly imported instance
9138         assert iobj.name == instance
9139         feedback_fn("Running rename script for %s" % instance)
9140         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9141                                                    self.source_instance_name,
9142                                                    self.op.debug_level)
9143         if result.fail_msg:
9144           self.LogWarning("Failed to run rename script for %s on node"
9145                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9146
9147       else:
9148         # also checked in the prereq part
9149         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9150                                      % self.op.mode)
9151
9152     if self.op.start:
9153       iobj.admin_up = True
9154       self.cfg.Update(iobj, feedback_fn)
9155       logging.info("Starting instance %s on node %s", instance, pnode_name)
9156       feedback_fn("* starting instance...")
9157       result = self.rpc.call_instance_start(pnode_name, iobj,
9158                                             None, None, False)
9159       result.Raise("Could not start instance")
9160
9161     return list(iobj.all_nodes)
9162
9163
9164 class LUInstanceConsole(NoHooksLU):
9165   """Connect to an instance's console.
9166
9167   This is somewhat special in that it returns the command line that
9168   you need to run on the master node in order to connect to the
9169   console.
9170
9171   """
9172   REQ_BGL = False
9173
9174   def ExpandNames(self):
9175     self._ExpandAndLockInstance()
9176
9177   def CheckPrereq(self):
9178     """Check prerequisites.
9179
9180     This checks that the instance is in the cluster.
9181
9182     """
9183     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9184     assert self.instance is not None, \
9185       "Cannot retrieve locked instance %s" % self.op.instance_name
9186     _CheckNodeOnline(self, self.instance.primary_node)
9187
9188   def Exec(self, feedback_fn):
9189     """Connect to the console of an instance
9190
9191     """
9192     instance = self.instance
9193     node = instance.primary_node
9194
9195     node_insts = self.rpc.call_instance_list([node],
9196                                              [instance.hypervisor])[node]
9197     node_insts.Raise("Can't get node information from %s" % node)
9198
9199     if instance.name not in node_insts.payload:
9200       if instance.admin_up:
9201         state = constants.INSTST_ERRORDOWN
9202       else:
9203         state = constants.INSTST_ADMINDOWN
9204       raise errors.OpExecError("Instance %s is not running (state %s)" %
9205                                (instance.name, state))
9206
9207     logging.debug("Connecting to console of %s on %s", instance.name, node)
9208
9209     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9210
9211
9212 def _GetInstanceConsole(cluster, instance):
9213   """Returns console information for an instance.
9214
9215   @type cluster: L{objects.Cluster}
9216   @type instance: L{objects.Instance}
9217   @rtype: dict
9218
9219   """
9220   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9221   # beparams and hvparams are passed separately, to avoid editing the
9222   # instance and then saving the defaults in the instance itself.
9223   hvparams = cluster.FillHV(instance)
9224   beparams = cluster.FillBE(instance)
9225   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9226
9227   assert console.instance == instance.name
9228   assert console.Validate()
9229
9230   return console.ToDict()
9231
9232
9233 class LUInstanceReplaceDisks(LogicalUnit):
9234   """Replace the disks of an instance.
9235
9236   """
9237   HPATH = "mirrors-replace"
9238   HTYPE = constants.HTYPE_INSTANCE
9239   REQ_BGL = False
9240
9241   def CheckArguments(self):
9242     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9243                                   self.op.iallocator)
9244
9245   def ExpandNames(self):
9246     self._ExpandAndLockInstance()
9247
9248     assert locking.LEVEL_NODE not in self.needed_locks
9249     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9250
9251     assert self.op.iallocator is None or self.op.remote_node is None, \
9252       "Conflicting options"
9253
9254     if self.op.remote_node is not None:
9255       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9256
9257       # Warning: do not remove the locking of the new secondary here
9258       # unless DRBD8.AddChildren is changed to work in parallel;
9259       # currently it doesn't since parallel invocations of
9260       # FindUnusedMinor will conflict
9261       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9262       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9263     else:
9264       self.needed_locks[locking.LEVEL_NODE] = []
9265       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9266
9267       if self.op.iallocator is not None:
9268         # iallocator will select a new node in the same group
9269         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9270
9271     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9272                                    self.op.iallocator, self.op.remote_node,
9273                                    self.op.disks, False, self.op.early_release)
9274
9275     self.tasklets = [self.replacer]
9276
9277   def DeclareLocks(self, level):
9278     if level == locking.LEVEL_NODEGROUP:
9279       assert self.op.remote_node is None
9280       assert self.op.iallocator is not None
9281       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9282
9283       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9284       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9285         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9286
9287     elif level == locking.LEVEL_NODE:
9288       if self.op.iallocator is not None:
9289         assert self.op.remote_node is None
9290         assert not self.needed_locks[locking.LEVEL_NODE]
9291
9292         # Lock member nodes of all locked groups
9293         self.needed_locks[locking.LEVEL_NODE] = [node_name
9294           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9295           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9296       else:
9297         self._LockInstancesNodes()
9298
9299   def BuildHooksEnv(self):
9300     """Build hooks env.
9301
9302     This runs on the master, the primary and all the secondaries.
9303
9304     """
9305     instance = self.replacer.instance
9306     env = {
9307       "MODE": self.op.mode,
9308       "NEW_SECONDARY": self.op.remote_node,
9309       "OLD_SECONDARY": instance.secondary_nodes[0],
9310       }
9311     env.update(_BuildInstanceHookEnvByObject(self, instance))
9312     return env
9313
9314   def BuildHooksNodes(self):
9315     """Build hooks nodes.
9316
9317     """
9318     instance = self.replacer.instance
9319     nl = [
9320       self.cfg.GetMasterNode(),
9321       instance.primary_node,
9322       ]
9323     if self.op.remote_node is not None:
9324       nl.append(self.op.remote_node)
9325     return nl, nl
9326
9327   def CheckPrereq(self):
9328     """Check prerequisites.
9329
9330     """
9331     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9332             self.op.iallocator is None)
9333
9334     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9335     if owned_groups:
9336       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9337
9338     return LogicalUnit.CheckPrereq(self)
9339
9340
9341 class TLReplaceDisks(Tasklet):
9342   """Replaces disks for an instance.
9343
9344   Note: Locking is not within the scope of this class.
9345
9346   """
9347   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9348                disks, delay_iallocator, early_release):
9349     """Initializes this class.
9350
9351     """
9352     Tasklet.__init__(self, lu)
9353
9354     # Parameters
9355     self.instance_name = instance_name
9356     self.mode = mode
9357     self.iallocator_name = iallocator_name
9358     self.remote_node = remote_node
9359     self.disks = disks
9360     self.delay_iallocator = delay_iallocator
9361     self.early_release = early_release
9362
9363     # Runtime data
9364     self.instance = None
9365     self.new_node = None
9366     self.target_node = None
9367     self.other_node = None
9368     self.remote_node_info = None
9369     self.node_secondary_ip = None
9370
9371   @staticmethod
9372   def CheckArguments(mode, remote_node, iallocator):
9373     """Helper function for users of this class.
9374
9375     """
9376     # check for valid parameter combination
9377     if mode == constants.REPLACE_DISK_CHG:
9378       if remote_node is None and iallocator is None:
9379         raise errors.OpPrereqError("When changing the secondary either an"
9380                                    " iallocator script must be used or the"
9381                                    " new node given", errors.ECODE_INVAL)
9382
9383       if remote_node is not None and iallocator is not None:
9384         raise errors.OpPrereqError("Give either the iallocator or the new"
9385                                    " secondary, not both", errors.ECODE_INVAL)
9386
9387     elif remote_node is not None or iallocator is not None:
9388       # Not replacing the secondary
9389       raise errors.OpPrereqError("The iallocator and new node options can"
9390                                  " only be used when changing the"
9391                                  " secondary node", errors.ECODE_INVAL)
9392
9393   @staticmethod
9394   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9395     """Compute a new secondary node using an IAllocator.
9396
9397     """
9398     ial = IAllocator(lu.cfg, lu.rpc,
9399                      mode=constants.IALLOCATOR_MODE_RELOC,
9400                      name=instance_name,
9401                      relocate_from=list(relocate_from))
9402
9403     ial.Run(iallocator_name)
9404
9405     if not ial.success:
9406       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9407                                  " %s" % (iallocator_name, ial.info),
9408                                  errors.ECODE_NORES)
9409
9410     if len(ial.result) != ial.required_nodes:
9411       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9412                                  " of nodes (%s), required %s" %
9413                                  (iallocator_name,
9414                                   len(ial.result), ial.required_nodes),
9415                                  errors.ECODE_FAULT)
9416
9417     remote_node_name = ial.result[0]
9418
9419     lu.LogInfo("Selected new secondary for instance '%s': %s",
9420                instance_name, remote_node_name)
9421
9422     return remote_node_name
9423
9424   def _FindFaultyDisks(self, node_name):
9425     """Wrapper for L{_FindFaultyInstanceDisks}.
9426
9427     """
9428     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9429                                     node_name, True)
9430
9431   def _CheckDisksActivated(self, instance):
9432     """Checks if the instance disks are activated.
9433
9434     @param instance: The instance to check disks
9435     @return: True if they are activated, False otherwise
9436
9437     """
9438     nodes = instance.all_nodes
9439
9440     for idx, dev in enumerate(instance.disks):
9441       for node in nodes:
9442         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9443         self.cfg.SetDiskID(dev, node)
9444
9445         result = self.rpc.call_blockdev_find(node, dev)
9446
9447         if result.offline:
9448           continue
9449         elif result.fail_msg or not result.payload:
9450           return False
9451
9452     return True
9453
9454   def CheckPrereq(self):
9455     """Check prerequisites.
9456
9457     This checks that the instance is in the cluster.
9458
9459     """
9460     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9461     assert instance is not None, \
9462       "Cannot retrieve locked instance %s" % self.instance_name
9463
9464     if instance.disk_template != constants.DT_DRBD8:
9465       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9466                                  " instances", errors.ECODE_INVAL)
9467
9468     if len(instance.secondary_nodes) != 1:
9469       raise errors.OpPrereqError("The instance has a strange layout,"
9470                                  " expected one secondary but found %d" %
9471                                  len(instance.secondary_nodes),
9472                                  errors.ECODE_FAULT)
9473
9474     if not self.delay_iallocator:
9475       self._CheckPrereq2()
9476
9477   def _CheckPrereq2(self):
9478     """Check prerequisites, second part.
9479
9480     This function should always be part of CheckPrereq. It was separated and is
9481     now called from Exec because during node evacuation iallocator was only
9482     called with an unmodified cluster model, not taking planned changes into
9483     account.
9484
9485     """
9486     instance = self.instance
9487     secondary_node = instance.secondary_nodes[0]
9488
9489     if self.iallocator_name is None:
9490       remote_node = self.remote_node
9491     else:
9492       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9493                                        instance.name, instance.secondary_nodes)
9494
9495     if remote_node is None:
9496       self.remote_node_info = None
9497     else:
9498       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9499              "Remote node '%s' is not locked" % remote_node
9500
9501       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9502       assert self.remote_node_info is not None, \
9503         "Cannot retrieve locked node %s" % remote_node
9504
9505     if remote_node == self.instance.primary_node:
9506       raise errors.OpPrereqError("The specified node is the primary node of"
9507                                  " the instance", errors.ECODE_INVAL)
9508
9509     if remote_node == secondary_node:
9510       raise errors.OpPrereqError("The specified node is already the"
9511                                  " secondary node of the instance",
9512                                  errors.ECODE_INVAL)
9513
9514     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9515                                     constants.REPLACE_DISK_CHG):
9516       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9517                                  errors.ECODE_INVAL)
9518
9519     if self.mode == constants.REPLACE_DISK_AUTO:
9520       if not self._CheckDisksActivated(instance):
9521         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9522                                    " first" % self.instance_name,
9523                                    errors.ECODE_STATE)
9524       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9525       faulty_secondary = self._FindFaultyDisks(secondary_node)
9526
9527       if faulty_primary and faulty_secondary:
9528         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9529                                    " one node and can not be repaired"
9530                                    " automatically" % self.instance_name,
9531                                    errors.ECODE_STATE)
9532
9533       if faulty_primary:
9534         self.disks = faulty_primary
9535         self.target_node = instance.primary_node
9536         self.other_node = secondary_node
9537         check_nodes = [self.target_node, self.other_node]
9538       elif faulty_secondary:
9539         self.disks = faulty_secondary
9540         self.target_node = secondary_node
9541         self.other_node = instance.primary_node
9542         check_nodes = [self.target_node, self.other_node]
9543       else:
9544         self.disks = []
9545         check_nodes = []
9546
9547     else:
9548       # Non-automatic modes
9549       if self.mode == constants.REPLACE_DISK_PRI:
9550         self.target_node = instance.primary_node
9551         self.other_node = secondary_node
9552         check_nodes = [self.target_node, self.other_node]
9553
9554       elif self.mode == constants.REPLACE_DISK_SEC:
9555         self.target_node = secondary_node
9556         self.other_node = instance.primary_node
9557         check_nodes = [self.target_node, self.other_node]
9558
9559       elif self.mode == constants.REPLACE_DISK_CHG:
9560         self.new_node = remote_node
9561         self.other_node = instance.primary_node
9562         self.target_node = secondary_node
9563         check_nodes = [self.new_node, self.other_node]
9564
9565         _CheckNodeNotDrained(self.lu, remote_node)
9566         _CheckNodeVmCapable(self.lu, remote_node)
9567
9568         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9569         assert old_node_info is not None
9570         if old_node_info.offline and not self.early_release:
9571           # doesn't make sense to delay the release
9572           self.early_release = True
9573           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9574                           " early-release mode", secondary_node)
9575
9576       else:
9577         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9578                                      self.mode)
9579
9580       # If not specified all disks should be replaced
9581       if not self.disks:
9582         self.disks = range(len(self.instance.disks))
9583
9584     for node in check_nodes:
9585       _CheckNodeOnline(self.lu, node)
9586
9587     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9588                                                           self.other_node,
9589                                                           self.target_node]
9590                               if node_name is not None)
9591
9592     # Release unneeded node locks
9593     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9594
9595     # Release any owned node group
9596     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9597       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9598
9599     # Check whether disks are valid
9600     for disk_idx in self.disks:
9601       instance.FindDisk(disk_idx)
9602
9603     # Get secondary node IP addresses
9604     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9605                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9606
9607   def Exec(self, feedback_fn):
9608     """Execute disk replacement.
9609
9610     This dispatches the disk replacement to the appropriate handler.
9611
9612     """
9613     if self.delay_iallocator:
9614       self._CheckPrereq2()
9615
9616     if __debug__:
9617       # Verify owned locks before starting operation
9618       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9619       assert set(owned_nodes) == set(self.node_secondary_ip), \
9620           ("Incorrect node locks, owning %s, expected %s" %
9621            (owned_nodes, self.node_secondary_ip.keys()))
9622
9623       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9624       assert list(owned_instances) == [self.instance_name], \
9625           "Instance '%s' not locked" % self.instance_name
9626
9627       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9628           "Should not own any node group lock at this point"
9629
9630     if not self.disks:
9631       feedback_fn("No disks need replacement")
9632       return
9633
9634     feedback_fn("Replacing disk(s) %s for %s" %
9635                 (utils.CommaJoin(self.disks), self.instance.name))
9636
9637     activate_disks = (not self.instance.admin_up)
9638
9639     # Activate the instance disks if we're replacing them on a down instance
9640     if activate_disks:
9641       _StartInstanceDisks(self.lu, self.instance, True)
9642
9643     try:
9644       # Should we replace the secondary node?
9645       if self.new_node is not None:
9646         fn = self._ExecDrbd8Secondary
9647       else:
9648         fn = self._ExecDrbd8DiskOnly
9649
9650       result = fn(feedback_fn)
9651     finally:
9652       # Deactivate the instance disks if we're replacing them on a
9653       # down instance
9654       if activate_disks:
9655         _SafeShutdownInstanceDisks(self.lu, self.instance)
9656
9657     if __debug__:
9658       # Verify owned locks
9659       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9660       nodes = frozenset(self.node_secondary_ip)
9661       assert ((self.early_release and not owned_nodes) or
9662               (not self.early_release and not (set(owned_nodes) - nodes))), \
9663         ("Not owning the correct locks, early_release=%s, owned=%r,"
9664          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9665
9666     return result
9667
9668   def _CheckVolumeGroup(self, nodes):
9669     self.lu.LogInfo("Checking volume groups")
9670
9671     vgname = self.cfg.GetVGName()
9672
9673     # Make sure volume group exists on all involved nodes
9674     results = self.rpc.call_vg_list(nodes)
9675     if not results:
9676       raise errors.OpExecError("Can't list volume groups on the nodes")
9677
9678     for node in nodes:
9679       res = results[node]
9680       res.Raise("Error checking node %s" % node)
9681       if vgname not in res.payload:
9682         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9683                                  (vgname, node))
9684
9685   def _CheckDisksExistence(self, nodes):
9686     # Check disk existence
9687     for idx, dev in enumerate(self.instance.disks):
9688       if idx not in self.disks:
9689         continue
9690
9691       for node in nodes:
9692         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9693         self.cfg.SetDiskID(dev, node)
9694
9695         result = self.rpc.call_blockdev_find(node, dev)
9696
9697         msg = result.fail_msg
9698         if msg or not result.payload:
9699           if not msg:
9700             msg = "disk not found"
9701           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9702                                    (idx, node, msg))
9703
9704   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9705     for idx, dev in enumerate(self.instance.disks):
9706       if idx not in self.disks:
9707         continue
9708
9709       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9710                       (idx, node_name))
9711
9712       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9713                                    ldisk=ldisk):
9714         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9715                                  " replace disks for instance %s" %
9716                                  (node_name, self.instance.name))
9717
9718   def _CreateNewStorage(self, node_name):
9719     """Create new storage on the primary or secondary node.
9720
9721     This is only used for same-node replaces, not for changing the
9722     secondary node, hence we don't want to modify the existing disk.
9723
9724     """
9725     iv_names = {}
9726
9727     for idx, dev in enumerate(self.instance.disks):
9728       if idx not in self.disks:
9729         continue
9730
9731       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9732
9733       self.cfg.SetDiskID(dev, node_name)
9734
9735       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9736       names = _GenerateUniqueNames(self.lu, lv_names)
9737
9738       vg_data = dev.children[0].logical_id[0]
9739       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9740                              logical_id=(vg_data, names[0]))
9741       vg_meta = dev.children[1].logical_id[0]
9742       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9743                              logical_id=(vg_meta, names[1]))
9744
9745       new_lvs = [lv_data, lv_meta]
9746       old_lvs = [child.Copy() for child in dev.children]
9747       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9748
9749       # we pass force_create=True to force the LVM creation
9750       for new_lv in new_lvs:
9751         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9752                         _GetInstanceInfoText(self.instance), False)
9753
9754     return iv_names
9755
9756   def _CheckDevices(self, node_name, iv_names):
9757     for name, (dev, _, _) in iv_names.iteritems():
9758       self.cfg.SetDiskID(dev, node_name)
9759
9760       result = self.rpc.call_blockdev_find(node_name, dev)
9761
9762       msg = result.fail_msg
9763       if msg or not result.payload:
9764         if not msg:
9765           msg = "disk not found"
9766         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9767                                  (name, msg))
9768
9769       if result.payload.is_degraded:
9770         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9771
9772   def _RemoveOldStorage(self, node_name, iv_names):
9773     for name, (_, old_lvs, _) in iv_names.iteritems():
9774       self.lu.LogInfo("Remove logical volumes for %s" % name)
9775
9776       for lv in old_lvs:
9777         self.cfg.SetDiskID(lv, node_name)
9778
9779         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9780         if msg:
9781           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9782                              hint="remove unused LVs manually")
9783
9784   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9785     """Replace a disk on the primary or secondary for DRBD 8.
9786
9787     The algorithm for replace is quite complicated:
9788
9789       1. for each disk to be replaced:
9790
9791         1. create new LVs on the target node with unique names
9792         1. detach old LVs from the drbd device
9793         1. rename old LVs to name_replaced.<time_t>
9794         1. rename new LVs to old LVs
9795         1. attach the new LVs (with the old names now) to the drbd device
9796
9797       1. wait for sync across all devices
9798
9799       1. for each modified disk:
9800
9801         1. remove old LVs (which have the name name_replaces.<time_t>)
9802
9803     Failures are not very well handled.
9804
9805     """
9806     steps_total = 6
9807
9808     # Step: check device activation
9809     self.lu.LogStep(1, steps_total, "Check device existence")
9810     self._CheckDisksExistence([self.other_node, self.target_node])
9811     self._CheckVolumeGroup([self.target_node, self.other_node])
9812
9813     # Step: check other node consistency
9814     self.lu.LogStep(2, steps_total, "Check peer consistency")
9815     self._CheckDisksConsistency(self.other_node,
9816                                 self.other_node == self.instance.primary_node,
9817                                 False)
9818
9819     # Step: create new storage
9820     self.lu.LogStep(3, steps_total, "Allocate new storage")
9821     iv_names = self._CreateNewStorage(self.target_node)
9822
9823     # Step: for each lv, detach+rename*2+attach
9824     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9825     for dev, old_lvs, new_lvs in iv_names.itervalues():
9826       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9827
9828       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9829                                                      old_lvs)
9830       result.Raise("Can't detach drbd from local storage on node"
9831                    " %s for device %s" % (self.target_node, dev.iv_name))
9832       #dev.children = []
9833       #cfg.Update(instance)
9834
9835       # ok, we created the new LVs, so now we know we have the needed
9836       # storage; as such, we proceed on the target node to rename
9837       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9838       # using the assumption that logical_id == physical_id (which in
9839       # turn is the unique_id on that node)
9840
9841       # FIXME(iustin): use a better name for the replaced LVs
9842       temp_suffix = int(time.time())
9843       ren_fn = lambda d, suff: (d.physical_id[0],
9844                                 d.physical_id[1] + "_replaced-%s" % suff)
9845
9846       # Build the rename list based on what LVs exist on the node
9847       rename_old_to_new = []
9848       for to_ren in old_lvs:
9849         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9850         if not result.fail_msg and result.payload:
9851           # device exists
9852           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9853
9854       self.lu.LogInfo("Renaming the old LVs on the target node")
9855       result = self.rpc.call_blockdev_rename(self.target_node,
9856                                              rename_old_to_new)
9857       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9858
9859       # Now we rename the new LVs to the old LVs
9860       self.lu.LogInfo("Renaming the new LVs on the target node")
9861       rename_new_to_old = [(new, old.physical_id)
9862                            for old, new in zip(old_lvs, new_lvs)]
9863       result = self.rpc.call_blockdev_rename(self.target_node,
9864                                              rename_new_to_old)
9865       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9866
9867       # Intermediate steps of in memory modifications
9868       for old, new in zip(old_lvs, new_lvs):
9869         new.logical_id = old.logical_id
9870         self.cfg.SetDiskID(new, self.target_node)
9871
9872       # We need to modify old_lvs so that removal later removes the
9873       # right LVs, not the newly added ones; note that old_lvs is a
9874       # copy here
9875       for disk in old_lvs:
9876         disk.logical_id = ren_fn(disk, temp_suffix)
9877         self.cfg.SetDiskID(disk, self.target_node)
9878
9879       # Now that the new lvs have the old name, we can add them to the device
9880       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9881       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9882                                                   new_lvs)
9883       msg = result.fail_msg
9884       if msg:
9885         for new_lv in new_lvs:
9886           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9887                                                new_lv).fail_msg
9888           if msg2:
9889             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9890                                hint=("cleanup manually the unused logical"
9891                                      "volumes"))
9892         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9893
9894     cstep = 5
9895     if self.early_release:
9896       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9897       cstep += 1
9898       self._RemoveOldStorage(self.target_node, iv_names)
9899       # WARNING: we release both node locks here, do not do other RPCs
9900       # than WaitForSync to the primary node
9901       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9902                     names=[self.target_node, self.other_node])
9903
9904     # Wait for sync
9905     # This can fail as the old devices are degraded and _WaitForSync
9906     # does a combined result over all disks, so we don't check its return value
9907     self.lu.LogStep(cstep, steps_total, "Sync devices")
9908     cstep += 1
9909     _WaitForSync(self.lu, self.instance)
9910
9911     # Check all devices manually
9912     self._CheckDevices(self.instance.primary_node, iv_names)
9913
9914     # Step: remove old storage
9915     if not self.early_release:
9916       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9917       cstep += 1
9918       self._RemoveOldStorage(self.target_node, iv_names)
9919
9920   def _ExecDrbd8Secondary(self, feedback_fn):
9921     """Replace the secondary node for DRBD 8.
9922
9923     The algorithm for replace is quite complicated:
9924       - for all disks of the instance:
9925         - create new LVs on the new node with same names
9926         - shutdown the drbd device on the old secondary
9927         - disconnect the drbd network on the primary
9928         - create the drbd device on the new secondary
9929         - network attach the drbd on the primary, using an artifice:
9930           the drbd code for Attach() will connect to the network if it
9931           finds a device which is connected to the good local disks but
9932           not network enabled
9933       - wait for sync across all devices
9934       - remove all disks from the old secondary
9935
9936     Failures are not very well handled.
9937
9938     """
9939     steps_total = 6
9940
9941     pnode = self.instance.primary_node
9942
9943     # Step: check device activation
9944     self.lu.LogStep(1, steps_total, "Check device existence")
9945     self._CheckDisksExistence([self.instance.primary_node])
9946     self._CheckVolumeGroup([self.instance.primary_node])
9947
9948     # Step: check other node consistency
9949     self.lu.LogStep(2, steps_total, "Check peer consistency")
9950     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9951
9952     # Step: create new storage
9953     self.lu.LogStep(3, steps_total, "Allocate new storage")
9954     for idx, dev in enumerate(self.instance.disks):
9955       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9956                       (self.new_node, idx))
9957       # we pass force_create=True to force LVM creation
9958       for new_lv in dev.children:
9959         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9960                         _GetInstanceInfoText(self.instance), False)
9961
9962     # Step 4: dbrd minors and drbd setups changes
9963     # after this, we must manually remove the drbd minors on both the
9964     # error and the success paths
9965     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9966     minors = self.cfg.AllocateDRBDMinor([self.new_node
9967                                          for dev in self.instance.disks],
9968                                         self.instance.name)
9969     logging.debug("Allocated minors %r", minors)
9970
9971     iv_names = {}
9972     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9973       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9974                       (self.new_node, idx))
9975       # create new devices on new_node; note that we create two IDs:
9976       # one without port, so the drbd will be activated without
9977       # networking information on the new node at this stage, and one
9978       # with network, for the latter activation in step 4
9979       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9980       if self.instance.primary_node == o_node1:
9981         p_minor = o_minor1
9982       else:
9983         assert self.instance.primary_node == o_node2, "Three-node instance?"
9984         p_minor = o_minor2
9985
9986       new_alone_id = (self.instance.primary_node, self.new_node, None,
9987                       p_minor, new_minor, o_secret)
9988       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9989                     p_minor, new_minor, o_secret)
9990
9991       iv_names[idx] = (dev, dev.children, new_net_id)
9992       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9993                     new_net_id)
9994       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9995                               logical_id=new_alone_id,
9996                               children=dev.children,
9997                               size=dev.size)
9998       try:
9999         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10000                               _GetInstanceInfoText(self.instance), False)
10001       except errors.GenericError:
10002         self.cfg.ReleaseDRBDMinors(self.instance.name)
10003         raise
10004
10005     # We have new devices, shutdown the drbd on the old secondary
10006     for idx, dev in enumerate(self.instance.disks):
10007       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10008       self.cfg.SetDiskID(dev, self.target_node)
10009       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10010       if msg:
10011         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10012                            "node: %s" % (idx, msg),
10013                            hint=("Please cleanup this device manually as"
10014                                  " soon as possible"))
10015
10016     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10017     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10018                                                self.instance.disks)[pnode]
10019
10020     msg = result.fail_msg
10021     if msg:
10022       # detaches didn't succeed (unlikely)
10023       self.cfg.ReleaseDRBDMinors(self.instance.name)
10024       raise errors.OpExecError("Can't detach the disks from the network on"
10025                                " old node: %s" % (msg,))
10026
10027     # if we managed to detach at least one, we update all the disks of
10028     # the instance to point to the new secondary
10029     self.lu.LogInfo("Updating instance configuration")
10030     for dev, _, new_logical_id in iv_names.itervalues():
10031       dev.logical_id = new_logical_id
10032       self.cfg.SetDiskID(dev, self.instance.primary_node)
10033
10034     self.cfg.Update(self.instance, feedback_fn)
10035
10036     # and now perform the drbd attach
10037     self.lu.LogInfo("Attaching primary drbds to new secondary"
10038                     " (standalone => connected)")
10039     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10040                                             self.new_node],
10041                                            self.node_secondary_ip,
10042                                            self.instance.disks,
10043                                            self.instance.name,
10044                                            False)
10045     for to_node, to_result in result.items():
10046       msg = to_result.fail_msg
10047       if msg:
10048         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10049                            to_node, msg,
10050                            hint=("please do a gnt-instance info to see the"
10051                                  " status of disks"))
10052     cstep = 5
10053     if self.early_release:
10054       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10055       cstep += 1
10056       self._RemoveOldStorage(self.target_node, iv_names)
10057       # WARNING: we release all node locks here, do not do other RPCs
10058       # than WaitForSync to the primary node
10059       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10060                     names=[self.instance.primary_node,
10061                            self.target_node,
10062                            self.new_node])
10063
10064     # Wait for sync
10065     # This can fail as the old devices are degraded and _WaitForSync
10066     # does a combined result over all disks, so we don't check its return value
10067     self.lu.LogStep(cstep, steps_total, "Sync devices")
10068     cstep += 1
10069     _WaitForSync(self.lu, self.instance)
10070
10071     # Check all devices manually
10072     self._CheckDevices(self.instance.primary_node, iv_names)
10073
10074     # Step: remove old storage
10075     if not self.early_release:
10076       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10077       self._RemoveOldStorage(self.target_node, iv_names)
10078
10079
10080 class LURepairNodeStorage(NoHooksLU):
10081   """Repairs the volume group on a node.
10082
10083   """
10084   REQ_BGL = False
10085
10086   def CheckArguments(self):
10087     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10088
10089     storage_type = self.op.storage_type
10090
10091     if (constants.SO_FIX_CONSISTENCY not in
10092         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10093       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10094                                  " repaired" % storage_type,
10095                                  errors.ECODE_INVAL)
10096
10097   def ExpandNames(self):
10098     self.needed_locks = {
10099       locking.LEVEL_NODE: [self.op.node_name],
10100       }
10101
10102   def _CheckFaultyDisks(self, instance, node_name):
10103     """Ensure faulty disks abort the opcode or at least warn."""
10104     try:
10105       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10106                                   node_name, True):
10107         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10108                                    " node '%s'" % (instance.name, node_name),
10109                                    errors.ECODE_STATE)
10110     except errors.OpPrereqError, err:
10111       if self.op.ignore_consistency:
10112         self.proc.LogWarning(str(err.args[0]))
10113       else:
10114         raise
10115
10116   def CheckPrereq(self):
10117     """Check prerequisites.
10118
10119     """
10120     # Check whether any instance on this node has faulty disks
10121     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10122       if not inst.admin_up:
10123         continue
10124       check_nodes = set(inst.all_nodes)
10125       check_nodes.discard(self.op.node_name)
10126       for inst_node_name in check_nodes:
10127         self._CheckFaultyDisks(inst, inst_node_name)
10128
10129   def Exec(self, feedback_fn):
10130     feedback_fn("Repairing storage unit '%s' on %s ..." %
10131                 (self.op.name, self.op.node_name))
10132
10133     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10134     result = self.rpc.call_storage_execute(self.op.node_name,
10135                                            self.op.storage_type, st_args,
10136                                            self.op.name,
10137                                            constants.SO_FIX_CONSISTENCY)
10138     result.Raise("Failed to repair storage unit '%s' on %s" %
10139                  (self.op.name, self.op.node_name))
10140
10141
10142 class LUNodeEvacuate(NoHooksLU):
10143   """Evacuates instances off a list of nodes.
10144
10145   """
10146   REQ_BGL = False
10147
10148   _MODE2IALLOCATOR = {
10149     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10150     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10151     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10152     }
10153   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10154   assert (frozenset(_MODE2IALLOCATOR.values()) ==
10155           constants.IALLOCATOR_NEVAC_MODES)
10156
10157   def CheckArguments(self):
10158     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10159
10160   def ExpandNames(self):
10161     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10162
10163     if self.op.remote_node is not None:
10164       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10165       assert self.op.remote_node
10166
10167       if self.op.remote_node == self.op.node_name:
10168         raise errors.OpPrereqError("Can not use evacuated node as a new"
10169                                    " secondary node", errors.ECODE_INVAL)
10170
10171       if self.op.mode != constants.NODE_EVAC_SEC:
10172         raise errors.OpPrereqError("Without the use of an iallocator only"
10173                                    " secondary instances can be evacuated",
10174                                    errors.ECODE_INVAL)
10175
10176     # Declare locks
10177     self.share_locks = _ShareAll()
10178     self.needed_locks = {
10179       locking.LEVEL_INSTANCE: [],
10180       locking.LEVEL_NODEGROUP: [],
10181       locking.LEVEL_NODE: [],
10182       }
10183
10184     # Determine nodes (via group) optimistically, needs verification once locks
10185     # have been acquired
10186     self.lock_nodes = self._DetermineNodes()
10187
10188   def _DetermineNodes(self):
10189     """Gets the list of nodes to operate on.
10190
10191     """
10192     if self.op.remote_node is None:
10193       # Iallocator will choose any node(s) in the same group
10194       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10195     else:
10196       group_nodes = frozenset([self.op.remote_node])
10197
10198     # Determine nodes to be locked
10199     return set([self.op.node_name]) | group_nodes
10200
10201   def _DetermineInstances(self):
10202     """Builds list of instances to operate on.
10203
10204     """
10205     assert self.op.mode in constants.NODE_EVAC_MODES
10206
10207     if self.op.mode == constants.NODE_EVAC_PRI:
10208       # Primary instances only
10209       inst_fn = _GetNodePrimaryInstances
10210       assert self.op.remote_node is None, \
10211         "Evacuating primary instances requires iallocator"
10212     elif self.op.mode == constants.NODE_EVAC_SEC:
10213       # Secondary instances only
10214       inst_fn = _GetNodeSecondaryInstances
10215     else:
10216       # All instances
10217       assert self.op.mode == constants.NODE_EVAC_ALL
10218       inst_fn = _GetNodeInstances
10219       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10220       # per instance
10221       raise errors.OpPrereqError("Due to an issue with the iallocator"
10222                                  " interface it is not possible to evacuate"
10223                                  " all instances at once; specify explicitly"
10224                                  " whether to evacuate primary or secondary"
10225                                  " instances",
10226                                  errors.ECODE_INVAL)
10227
10228     return inst_fn(self.cfg, self.op.node_name)
10229
10230   def DeclareLocks(self, level):
10231     if level == locking.LEVEL_INSTANCE:
10232       # Lock instances optimistically, needs verification once node and group
10233       # locks have been acquired
10234       self.needed_locks[locking.LEVEL_INSTANCE] = \
10235         set(i.name for i in self._DetermineInstances())
10236
10237     elif level == locking.LEVEL_NODEGROUP:
10238       # Lock node groups for all potential target nodes optimistically, needs
10239       # verification once nodes have been acquired
10240       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10241         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10242
10243     elif level == locking.LEVEL_NODE:
10244       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10245
10246   def CheckPrereq(self):
10247     # Verify locks
10248     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10249     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10250     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10251
10252     need_nodes = self._DetermineNodes()
10253
10254     if not owned_nodes.issuperset(need_nodes):
10255       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10256                                  " locks were acquired, current nodes are"
10257                                  " are '%s', used to be '%s'; retry the"
10258                                  " operation" %
10259                                  (self.op.node_name,
10260                                   utils.CommaJoin(need_nodes),
10261                                   utils.CommaJoin(owned_nodes)),
10262                                  errors.ECODE_STATE)
10263
10264     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10265     if owned_groups != wanted_groups:
10266       raise errors.OpExecError("Node groups changed since locks were acquired,"
10267                                " current groups are '%s', used to be '%s';"
10268                                " retry the operation" %
10269                                (utils.CommaJoin(wanted_groups),
10270                                 utils.CommaJoin(owned_groups)))
10271
10272     # Determine affected instances
10273     self.instances = self._DetermineInstances()
10274     self.instance_names = [i.name for i in self.instances]
10275
10276     if set(self.instance_names) != owned_instances:
10277       raise errors.OpExecError("Instances on node '%s' changed since locks"
10278                                " were acquired, current instances are '%s',"
10279                                " used to be '%s'; retry the operation" %
10280                                (self.op.node_name,
10281                                 utils.CommaJoin(self.instance_names),
10282                                 utils.CommaJoin(owned_instances)))
10283
10284     if self.instance_names:
10285       self.LogInfo("Evacuating instances from node '%s': %s",
10286                    self.op.node_name,
10287                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10288     else:
10289       self.LogInfo("No instances to evacuate from node '%s'",
10290                    self.op.node_name)
10291
10292     if self.op.remote_node is not None:
10293       for i in self.instances:
10294         if i.primary_node == self.op.remote_node:
10295           raise errors.OpPrereqError("Node %s is the primary node of"
10296                                      " instance %s, cannot use it as"
10297                                      " secondary" %
10298                                      (self.op.remote_node, i.name),
10299                                      errors.ECODE_INVAL)
10300
10301   def Exec(self, feedback_fn):
10302     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10303
10304     if not self.instance_names:
10305       # No instances to evacuate
10306       jobs = []
10307
10308     elif self.op.iallocator is not None:
10309       # TODO: Implement relocation to other group
10310       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10311                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10312                        instances=list(self.instance_names))
10313
10314       ial.Run(self.op.iallocator)
10315
10316       if not ial.success:
10317         raise errors.OpPrereqError("Can't compute node evacuation using"
10318                                    " iallocator '%s': %s" %
10319                                    (self.op.iallocator, ial.info),
10320                                    errors.ECODE_NORES)
10321
10322       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10323
10324     elif self.op.remote_node is not None:
10325       assert self.op.mode == constants.NODE_EVAC_SEC
10326       jobs = [
10327         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10328                                         remote_node=self.op.remote_node,
10329                                         disks=[],
10330                                         mode=constants.REPLACE_DISK_CHG,
10331                                         early_release=self.op.early_release)]
10332         for instance_name in self.instance_names
10333         ]
10334
10335     else:
10336       raise errors.ProgrammerError("No iallocator or remote node")
10337
10338     return ResultWithJobs(jobs)
10339
10340
10341 def _SetOpEarlyRelease(early_release, op):
10342   """Sets C{early_release} flag on opcodes if available.
10343
10344   """
10345   try:
10346     op.early_release = early_release
10347   except AttributeError:
10348     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10349
10350   return op
10351
10352
10353 def _NodeEvacDest(use_nodes, group, nodes):
10354   """Returns group or nodes depending on caller's choice.
10355
10356   """
10357   if use_nodes:
10358     return utils.CommaJoin(nodes)
10359   else:
10360     return group
10361
10362
10363 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10364   """Unpacks the result of change-group and node-evacuate iallocator requests.
10365
10366   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10367   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10368
10369   @type lu: L{LogicalUnit}
10370   @param lu: Logical unit instance
10371   @type alloc_result: tuple/list
10372   @param alloc_result: Result from iallocator
10373   @type early_release: bool
10374   @param early_release: Whether to release locks early if possible
10375   @type use_nodes: bool
10376   @param use_nodes: Whether to display node names instead of groups
10377
10378   """
10379   (moved, failed, jobs) = alloc_result
10380
10381   if failed:
10382     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10383                                  for (name, reason) in failed)
10384     lu.LogWarning("Unable to evacuate instances %s", failreason)
10385     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10386
10387   if moved:
10388     lu.LogInfo("Instances to be moved: %s",
10389                utils.CommaJoin("%s (to %s)" %
10390                                (name, _NodeEvacDest(use_nodes, group, nodes))
10391                                for (name, group, nodes) in moved))
10392
10393   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10394               map(opcodes.OpCode.LoadOpCode, ops))
10395           for ops in jobs]
10396
10397
10398 class LUInstanceGrowDisk(LogicalUnit):
10399   """Grow a disk of an instance.
10400
10401   """
10402   HPATH = "disk-grow"
10403   HTYPE = constants.HTYPE_INSTANCE
10404   REQ_BGL = False
10405
10406   def ExpandNames(self):
10407     self._ExpandAndLockInstance()
10408     self.needed_locks[locking.LEVEL_NODE] = []
10409     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10410
10411   def DeclareLocks(self, level):
10412     if level == locking.LEVEL_NODE:
10413       self._LockInstancesNodes()
10414
10415   def BuildHooksEnv(self):
10416     """Build hooks env.
10417
10418     This runs on the master, the primary and all the secondaries.
10419
10420     """
10421     env = {
10422       "DISK": self.op.disk,
10423       "AMOUNT": self.op.amount,
10424       }
10425     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10426     return env
10427
10428   def BuildHooksNodes(self):
10429     """Build hooks nodes.
10430
10431     """
10432     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10433     return (nl, nl)
10434
10435   def CheckPrereq(self):
10436     """Check prerequisites.
10437
10438     This checks that the instance is in the cluster.
10439
10440     """
10441     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10442     assert instance is not None, \
10443       "Cannot retrieve locked instance %s" % self.op.instance_name
10444     nodenames = list(instance.all_nodes)
10445     for node in nodenames:
10446       _CheckNodeOnline(self, node)
10447
10448     self.instance = instance
10449
10450     if instance.disk_template not in constants.DTS_GROWABLE:
10451       raise errors.OpPrereqError("Instance's disk layout does not support"
10452                                  " growing", errors.ECODE_INVAL)
10453
10454     self.disk = instance.FindDisk(self.op.disk)
10455
10456     if instance.disk_template not in (constants.DT_FILE,
10457                                       constants.DT_SHARED_FILE):
10458       # TODO: check the free disk space for file, when that feature will be
10459       # supported
10460       _CheckNodesFreeDiskPerVG(self, nodenames,
10461                                self.disk.ComputeGrowth(self.op.amount))
10462
10463   def Exec(self, feedback_fn):
10464     """Execute disk grow.
10465
10466     """
10467     instance = self.instance
10468     disk = self.disk
10469
10470     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10471     if not disks_ok:
10472       raise errors.OpExecError("Cannot activate block device to grow")
10473
10474     # First run all grow ops in dry-run mode
10475     for node in instance.all_nodes:
10476       self.cfg.SetDiskID(disk, node)
10477       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10478       result.Raise("Grow request failed to node %s" % node)
10479
10480     # We know that (as far as we can test) operations across different
10481     # nodes will succeed, time to run it for real
10482     for node in instance.all_nodes:
10483       self.cfg.SetDiskID(disk, node)
10484       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10485       result.Raise("Grow request failed to node %s" % node)
10486
10487       # TODO: Rewrite code to work properly
10488       # DRBD goes into sync mode for a short amount of time after executing the
10489       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10490       # calling "resize" in sync mode fails. Sleeping for a short amount of
10491       # time is a work-around.
10492       time.sleep(5)
10493
10494     disk.RecordGrow(self.op.amount)
10495     self.cfg.Update(instance, feedback_fn)
10496     if self.op.wait_for_sync:
10497       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10498       if disk_abort:
10499         self.proc.LogWarning("Disk sync-ing has not returned a good"
10500                              " status; please check the instance")
10501       if not instance.admin_up:
10502         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10503     elif not instance.admin_up:
10504       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10505                            " not supposed to be running because no wait for"
10506                            " sync mode was requested")
10507
10508
10509 class LUInstanceQueryData(NoHooksLU):
10510   """Query runtime instance data.
10511
10512   """
10513   REQ_BGL = False
10514
10515   def ExpandNames(self):
10516     self.needed_locks = {}
10517
10518     # Use locking if requested or when non-static information is wanted
10519     if not (self.op.static or self.op.use_locking):
10520       self.LogWarning("Non-static data requested, locks need to be acquired")
10521       self.op.use_locking = True
10522
10523     if self.op.instances or not self.op.use_locking:
10524       # Expand instance names right here
10525       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10526     else:
10527       # Will use acquired locks
10528       self.wanted_names = None
10529
10530     if self.op.use_locking:
10531       self.share_locks = _ShareAll()
10532
10533       if self.wanted_names is None:
10534         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10535       else:
10536         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10537
10538       self.needed_locks[locking.LEVEL_NODE] = []
10539       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10540
10541   def DeclareLocks(self, level):
10542     if self.op.use_locking and level == locking.LEVEL_NODE:
10543       self._LockInstancesNodes()
10544
10545   def CheckPrereq(self):
10546     """Check prerequisites.
10547
10548     This only checks the optional instance list against the existing names.
10549
10550     """
10551     if self.wanted_names is None:
10552       assert self.op.use_locking, "Locking was not used"
10553       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10554
10555     self.wanted_instances = \
10556         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10557
10558   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10559     """Returns the status of a block device
10560
10561     """
10562     if self.op.static or not node:
10563       return None
10564
10565     self.cfg.SetDiskID(dev, node)
10566
10567     result = self.rpc.call_blockdev_find(node, dev)
10568     if result.offline:
10569       return None
10570
10571     result.Raise("Can't compute disk status for %s" % instance_name)
10572
10573     status = result.payload
10574     if status is None:
10575       return None
10576
10577     return (status.dev_path, status.major, status.minor,
10578             status.sync_percent, status.estimated_time,
10579             status.is_degraded, status.ldisk_status)
10580
10581   def _ComputeDiskStatus(self, instance, snode, dev):
10582     """Compute block device status.
10583
10584     """
10585     if dev.dev_type in constants.LDS_DRBD:
10586       # we change the snode then (otherwise we use the one passed in)
10587       if dev.logical_id[0] == instance.primary_node:
10588         snode = dev.logical_id[1]
10589       else:
10590         snode = dev.logical_id[0]
10591
10592     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10593                                               instance.name, dev)
10594     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10595
10596     if dev.children:
10597       dev_children = map(compat.partial(self._ComputeDiskStatus,
10598                                         instance, snode),
10599                          dev.children)
10600     else:
10601       dev_children = []
10602
10603     return {
10604       "iv_name": dev.iv_name,
10605       "dev_type": dev.dev_type,
10606       "logical_id": dev.logical_id,
10607       "physical_id": dev.physical_id,
10608       "pstatus": dev_pstatus,
10609       "sstatus": dev_sstatus,
10610       "children": dev_children,
10611       "mode": dev.mode,
10612       "size": dev.size,
10613       }
10614
10615   def Exec(self, feedback_fn):
10616     """Gather and return data"""
10617     result = {}
10618
10619     cluster = self.cfg.GetClusterInfo()
10620
10621     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10622                                           for i in self.wanted_instances)
10623     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10624       if self.op.static or pnode.offline:
10625         remote_state = None
10626         if pnode.offline:
10627           self.LogWarning("Primary node %s is marked offline, returning static"
10628                           " information only for instance %s" %
10629                           (pnode.name, instance.name))
10630       else:
10631         remote_info = self.rpc.call_instance_info(instance.primary_node,
10632                                                   instance.name,
10633                                                   instance.hypervisor)
10634         remote_info.Raise("Error checking node %s" % instance.primary_node)
10635         remote_info = remote_info.payload
10636         if remote_info and "state" in remote_info:
10637           remote_state = "up"
10638         else:
10639           remote_state = "down"
10640
10641       if instance.admin_up:
10642         config_state = "up"
10643       else:
10644         config_state = "down"
10645
10646       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10647                   instance.disks)
10648
10649       result[instance.name] = {
10650         "name": instance.name,
10651         "config_state": config_state,
10652         "run_state": remote_state,
10653         "pnode": instance.primary_node,
10654         "snodes": instance.secondary_nodes,
10655         "os": instance.os,
10656         # this happens to be the same format used for hooks
10657         "nics": _NICListToTuple(self, instance.nics),
10658         "disk_template": instance.disk_template,
10659         "disks": disks,
10660         "hypervisor": instance.hypervisor,
10661         "network_port": instance.network_port,
10662         "hv_instance": instance.hvparams,
10663         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10664         "be_instance": instance.beparams,
10665         "be_actual": cluster.FillBE(instance),
10666         "os_instance": instance.osparams,
10667         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10668         "serial_no": instance.serial_no,
10669         "mtime": instance.mtime,
10670         "ctime": instance.ctime,
10671         "uuid": instance.uuid,
10672         }
10673
10674     return result
10675
10676
10677 class LUInstanceSetParams(LogicalUnit):
10678   """Modifies an instances's parameters.
10679
10680   """
10681   HPATH = "instance-modify"
10682   HTYPE = constants.HTYPE_INSTANCE
10683   REQ_BGL = False
10684
10685   def CheckArguments(self):
10686     if not (self.op.nics or self.op.disks or self.op.disk_template or
10687             self.op.hvparams or self.op.beparams or self.op.os_name):
10688       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10689
10690     if self.op.hvparams:
10691       _CheckGlobalHvParams(self.op.hvparams)
10692
10693     # Disk validation
10694     disk_addremove = 0
10695     for disk_op, disk_dict in self.op.disks:
10696       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10697       if disk_op == constants.DDM_REMOVE:
10698         disk_addremove += 1
10699         continue
10700       elif disk_op == constants.DDM_ADD:
10701         disk_addremove += 1
10702       else:
10703         if not isinstance(disk_op, int):
10704           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10705         if not isinstance(disk_dict, dict):
10706           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10707           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10708
10709       if disk_op == constants.DDM_ADD:
10710         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10711         if mode not in constants.DISK_ACCESS_SET:
10712           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10713                                      errors.ECODE_INVAL)
10714         size = disk_dict.get(constants.IDISK_SIZE, None)
10715         if size is None:
10716           raise errors.OpPrereqError("Required disk parameter size missing",
10717                                      errors.ECODE_INVAL)
10718         try:
10719           size = int(size)
10720         except (TypeError, ValueError), err:
10721           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10722                                      str(err), errors.ECODE_INVAL)
10723         disk_dict[constants.IDISK_SIZE] = size
10724       else:
10725         # modification of disk
10726         if constants.IDISK_SIZE in disk_dict:
10727           raise errors.OpPrereqError("Disk size change not possible, use"
10728                                      " grow-disk", errors.ECODE_INVAL)
10729
10730     if disk_addremove > 1:
10731       raise errors.OpPrereqError("Only one disk add or remove operation"
10732                                  " supported at a time", errors.ECODE_INVAL)
10733
10734     if self.op.disks and self.op.disk_template is not None:
10735       raise errors.OpPrereqError("Disk template conversion and other disk"
10736                                  " changes not supported at the same time",
10737                                  errors.ECODE_INVAL)
10738
10739     if (self.op.disk_template and
10740         self.op.disk_template in constants.DTS_INT_MIRROR and
10741         self.op.remote_node is None):
10742       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10743                                  " one requires specifying a secondary node",
10744                                  errors.ECODE_INVAL)
10745
10746     # NIC validation
10747     nic_addremove = 0
10748     for nic_op, nic_dict in self.op.nics:
10749       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10750       if nic_op == constants.DDM_REMOVE:
10751         nic_addremove += 1
10752         continue
10753       elif nic_op == constants.DDM_ADD:
10754         nic_addremove += 1
10755       else:
10756         if not isinstance(nic_op, int):
10757           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10758         if not isinstance(nic_dict, dict):
10759           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10760           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10761
10762       # nic_dict should be a dict
10763       nic_ip = nic_dict.get(constants.INIC_IP, None)
10764       if nic_ip is not None:
10765         if nic_ip.lower() == constants.VALUE_NONE:
10766           nic_dict[constants.INIC_IP] = None
10767         else:
10768           if not netutils.IPAddress.IsValid(nic_ip):
10769             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10770                                        errors.ECODE_INVAL)
10771
10772       nic_bridge = nic_dict.get("bridge", None)
10773       nic_link = nic_dict.get(constants.INIC_LINK, None)
10774       if nic_bridge and nic_link:
10775         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10776                                    " at the same time", errors.ECODE_INVAL)
10777       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10778         nic_dict["bridge"] = None
10779       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10780         nic_dict[constants.INIC_LINK] = None
10781
10782       if nic_op == constants.DDM_ADD:
10783         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10784         if nic_mac is None:
10785           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10786
10787       if constants.INIC_MAC in nic_dict:
10788         nic_mac = nic_dict[constants.INIC_MAC]
10789         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10790           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10791
10792         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10793           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10794                                      " modifying an existing nic",
10795                                      errors.ECODE_INVAL)
10796
10797     if nic_addremove > 1:
10798       raise errors.OpPrereqError("Only one NIC add or remove operation"
10799                                  " supported at a time", errors.ECODE_INVAL)
10800
10801   def ExpandNames(self):
10802     self._ExpandAndLockInstance()
10803     self.needed_locks[locking.LEVEL_NODE] = []
10804     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10805
10806   def DeclareLocks(self, level):
10807     if level == locking.LEVEL_NODE:
10808       self._LockInstancesNodes()
10809       if self.op.disk_template and self.op.remote_node:
10810         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10811         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10812
10813   def BuildHooksEnv(self):
10814     """Build hooks env.
10815
10816     This runs on the master, primary and secondaries.
10817
10818     """
10819     args = dict()
10820     if constants.BE_MEMORY in self.be_new:
10821       args["memory"] = self.be_new[constants.BE_MEMORY]
10822     if constants.BE_VCPUS in self.be_new:
10823       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10824     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10825     # information at all.
10826     if self.op.nics:
10827       args["nics"] = []
10828       nic_override = dict(self.op.nics)
10829       for idx, nic in enumerate(self.instance.nics):
10830         if idx in nic_override:
10831           this_nic_override = nic_override[idx]
10832         else:
10833           this_nic_override = {}
10834         if constants.INIC_IP in this_nic_override:
10835           ip = this_nic_override[constants.INIC_IP]
10836         else:
10837           ip = nic.ip
10838         if constants.INIC_MAC in this_nic_override:
10839           mac = this_nic_override[constants.INIC_MAC]
10840         else:
10841           mac = nic.mac
10842         if idx in self.nic_pnew:
10843           nicparams = self.nic_pnew[idx]
10844         else:
10845           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10846         mode = nicparams[constants.NIC_MODE]
10847         link = nicparams[constants.NIC_LINK]
10848         args["nics"].append((ip, mac, mode, link))
10849       if constants.DDM_ADD in nic_override:
10850         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10851         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10852         nicparams = self.nic_pnew[constants.DDM_ADD]
10853         mode = nicparams[constants.NIC_MODE]
10854         link = nicparams[constants.NIC_LINK]
10855         args["nics"].append((ip, mac, mode, link))
10856       elif constants.DDM_REMOVE in nic_override:
10857         del args["nics"][-1]
10858
10859     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10860     if self.op.disk_template:
10861       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10862
10863     return env
10864
10865   def BuildHooksNodes(self):
10866     """Build hooks nodes.
10867
10868     """
10869     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10870     return (nl, nl)
10871
10872   def CheckPrereq(self):
10873     """Check prerequisites.
10874
10875     This only checks the instance list against the existing names.
10876
10877     """
10878     # checking the new params on the primary/secondary nodes
10879
10880     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10881     cluster = self.cluster = self.cfg.GetClusterInfo()
10882     assert self.instance is not None, \
10883       "Cannot retrieve locked instance %s" % self.op.instance_name
10884     pnode = instance.primary_node
10885     nodelist = list(instance.all_nodes)
10886
10887     # OS change
10888     if self.op.os_name and not self.op.force:
10889       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10890                       self.op.force_variant)
10891       instance_os = self.op.os_name
10892     else:
10893       instance_os = instance.os
10894
10895     if self.op.disk_template:
10896       if instance.disk_template == self.op.disk_template:
10897         raise errors.OpPrereqError("Instance already has disk template %s" %
10898                                    instance.disk_template, errors.ECODE_INVAL)
10899
10900       if (instance.disk_template,
10901           self.op.disk_template) not in self._DISK_CONVERSIONS:
10902         raise errors.OpPrereqError("Unsupported disk template conversion from"
10903                                    " %s to %s" % (instance.disk_template,
10904                                                   self.op.disk_template),
10905                                    errors.ECODE_INVAL)
10906       _CheckInstanceDown(self, instance, "cannot change disk template")
10907       if self.op.disk_template in constants.DTS_INT_MIRROR:
10908         if self.op.remote_node == pnode:
10909           raise errors.OpPrereqError("Given new secondary node %s is the same"
10910                                      " as the primary node of the instance" %
10911                                      self.op.remote_node, errors.ECODE_STATE)
10912         _CheckNodeOnline(self, self.op.remote_node)
10913         _CheckNodeNotDrained(self, self.op.remote_node)
10914         # FIXME: here we assume that the old instance type is DT_PLAIN
10915         assert instance.disk_template == constants.DT_PLAIN
10916         disks = [{constants.IDISK_SIZE: d.size,
10917                   constants.IDISK_VG: d.logical_id[0]}
10918                  for d in instance.disks]
10919         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10920         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10921
10922     # hvparams processing
10923     if self.op.hvparams:
10924       hv_type = instance.hypervisor
10925       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10926       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10927       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10928
10929       # local check
10930       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10931       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10932       self.hv_new = hv_new # the new actual values
10933       self.hv_inst = i_hvdict # the new dict (without defaults)
10934     else:
10935       self.hv_new = self.hv_inst = {}
10936
10937     # beparams processing
10938     if self.op.beparams:
10939       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10940                                    use_none=True)
10941       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10942       be_new = cluster.SimpleFillBE(i_bedict)
10943       self.be_new = be_new # the new actual values
10944       self.be_inst = i_bedict # the new dict (without defaults)
10945     else:
10946       self.be_new = self.be_inst = {}
10947     be_old = cluster.FillBE(instance)
10948
10949     # osparams processing
10950     if self.op.osparams:
10951       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10952       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10953       self.os_inst = i_osdict # the new dict (without defaults)
10954     else:
10955       self.os_inst = {}
10956
10957     self.warn = []
10958
10959     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10960         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10961       mem_check_list = [pnode]
10962       if be_new[constants.BE_AUTO_BALANCE]:
10963         # either we changed auto_balance to yes or it was from before
10964         mem_check_list.extend(instance.secondary_nodes)
10965       instance_info = self.rpc.call_instance_info(pnode, instance.name,
10966                                                   instance.hypervisor)
10967       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10968                                          instance.hypervisor)
10969       pninfo = nodeinfo[pnode]
10970       msg = pninfo.fail_msg
10971       if msg:
10972         # Assume the primary node is unreachable and go ahead
10973         self.warn.append("Can't get info from primary node %s: %s" %
10974                          (pnode, msg))
10975       elif not isinstance(pninfo.payload.get("memory_free", None), int):
10976         self.warn.append("Node data from primary node %s doesn't contain"
10977                          " free memory information" % pnode)
10978       elif instance_info.fail_msg:
10979         self.warn.append("Can't get instance runtime information: %s" %
10980                         instance_info.fail_msg)
10981       else:
10982         if instance_info.payload:
10983           current_mem = int(instance_info.payload["memory"])
10984         else:
10985           # Assume instance not running
10986           # (there is a slight race condition here, but it's not very probable,
10987           # and we have no other way to check)
10988           current_mem = 0
10989         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10990                     pninfo.payload["memory_free"])
10991         if miss_mem > 0:
10992           raise errors.OpPrereqError("This change will prevent the instance"
10993                                      " from starting, due to %d MB of memory"
10994                                      " missing on its primary node" % miss_mem,
10995                                      errors.ECODE_NORES)
10996
10997       if be_new[constants.BE_AUTO_BALANCE]:
10998         for node, nres in nodeinfo.items():
10999           if node not in instance.secondary_nodes:
11000             continue
11001           nres.Raise("Can't get info from secondary node %s" % node,
11002                      prereq=True, ecode=errors.ECODE_STATE)
11003           if not isinstance(nres.payload.get("memory_free", None), int):
11004             raise errors.OpPrereqError("Secondary node %s didn't return free"
11005                                        " memory information" % node,
11006                                        errors.ECODE_STATE)
11007           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11008             raise errors.OpPrereqError("This change will prevent the instance"
11009                                        " from failover to its secondary node"
11010                                        " %s, due to not enough memory" % node,
11011                                        errors.ECODE_STATE)
11012
11013     # NIC processing
11014     self.nic_pnew = {}
11015     self.nic_pinst = {}
11016     for nic_op, nic_dict in self.op.nics:
11017       if nic_op == constants.DDM_REMOVE:
11018         if not instance.nics:
11019           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11020                                      errors.ECODE_INVAL)
11021         continue
11022       if nic_op != constants.DDM_ADD:
11023         # an existing nic
11024         if not instance.nics:
11025           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11026                                      " no NICs" % nic_op,
11027                                      errors.ECODE_INVAL)
11028         if nic_op < 0 or nic_op >= len(instance.nics):
11029           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11030                                      " are 0 to %d" %
11031                                      (nic_op, len(instance.nics) - 1),
11032                                      errors.ECODE_INVAL)
11033         old_nic_params = instance.nics[nic_op].nicparams
11034         old_nic_ip = instance.nics[nic_op].ip
11035       else:
11036         old_nic_params = {}
11037         old_nic_ip = None
11038
11039       update_params_dict = dict([(key, nic_dict[key])
11040                                  for key in constants.NICS_PARAMETERS
11041                                  if key in nic_dict])
11042
11043       if "bridge" in nic_dict:
11044         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11045
11046       new_nic_params = _GetUpdatedParams(old_nic_params,
11047                                          update_params_dict)
11048       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11049       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11050       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11051       self.nic_pinst[nic_op] = new_nic_params
11052       self.nic_pnew[nic_op] = new_filled_nic_params
11053       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11054
11055       if new_nic_mode == constants.NIC_MODE_BRIDGED:
11056         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11057         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11058         if msg:
11059           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11060           if self.op.force:
11061             self.warn.append(msg)
11062           else:
11063             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11064       if new_nic_mode == constants.NIC_MODE_ROUTED:
11065         if constants.INIC_IP in nic_dict:
11066           nic_ip = nic_dict[constants.INIC_IP]
11067         else:
11068           nic_ip = old_nic_ip
11069         if nic_ip is None:
11070           raise errors.OpPrereqError("Cannot set the nic ip to None"
11071                                      " on a routed nic", errors.ECODE_INVAL)
11072       if constants.INIC_MAC in nic_dict:
11073         nic_mac = nic_dict[constants.INIC_MAC]
11074         if nic_mac is None:
11075           raise errors.OpPrereqError("Cannot set the nic mac to None",
11076                                      errors.ECODE_INVAL)
11077         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11078           # otherwise generate the mac
11079           nic_dict[constants.INIC_MAC] = \
11080             self.cfg.GenerateMAC(self.proc.GetECId())
11081         else:
11082           # or validate/reserve the current one
11083           try:
11084             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11085           except errors.ReservationError:
11086             raise errors.OpPrereqError("MAC address %s already in use"
11087                                        " in cluster" % nic_mac,
11088                                        errors.ECODE_NOTUNIQUE)
11089
11090     # DISK processing
11091     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11092       raise errors.OpPrereqError("Disk operations not supported for"
11093                                  " diskless instances",
11094                                  errors.ECODE_INVAL)
11095     for disk_op, _ in self.op.disks:
11096       if disk_op == constants.DDM_REMOVE:
11097         if len(instance.disks) == 1:
11098           raise errors.OpPrereqError("Cannot remove the last disk of"
11099                                      " an instance", errors.ECODE_INVAL)
11100         _CheckInstanceDown(self, instance, "cannot remove disks")
11101
11102       if (disk_op == constants.DDM_ADD and
11103           len(instance.disks) >= constants.MAX_DISKS):
11104         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11105                                    " add more" % constants.MAX_DISKS,
11106                                    errors.ECODE_STATE)
11107       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11108         # an existing disk
11109         if disk_op < 0 or disk_op >= len(instance.disks):
11110           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11111                                      " are 0 to %d" %
11112                                      (disk_op, len(instance.disks)),
11113                                      errors.ECODE_INVAL)
11114
11115     return
11116
11117   def _ConvertPlainToDrbd(self, feedback_fn):
11118     """Converts an instance from plain to drbd.
11119
11120     """
11121     feedback_fn("Converting template to drbd")
11122     instance = self.instance
11123     pnode = instance.primary_node
11124     snode = self.op.remote_node
11125
11126     # create a fake disk info for _GenerateDiskTemplate
11127     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11128                   constants.IDISK_VG: d.logical_id[0]}
11129                  for d in instance.disks]
11130     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11131                                       instance.name, pnode, [snode],
11132                                       disk_info, None, None, 0, feedback_fn)
11133     info = _GetInstanceInfoText(instance)
11134     feedback_fn("Creating aditional volumes...")
11135     # first, create the missing data and meta devices
11136     for disk in new_disks:
11137       # unfortunately this is... not too nice
11138       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11139                             info, True)
11140       for child in disk.children:
11141         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11142     # at this stage, all new LVs have been created, we can rename the
11143     # old ones
11144     feedback_fn("Renaming original volumes...")
11145     rename_list = [(o, n.children[0].logical_id)
11146                    for (o, n) in zip(instance.disks, new_disks)]
11147     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11148     result.Raise("Failed to rename original LVs")
11149
11150     feedback_fn("Initializing DRBD devices...")
11151     # all child devices are in place, we can now create the DRBD devices
11152     for disk in new_disks:
11153       for node in [pnode, snode]:
11154         f_create = node == pnode
11155         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11156
11157     # at this point, the instance has been modified
11158     instance.disk_template = constants.DT_DRBD8
11159     instance.disks = new_disks
11160     self.cfg.Update(instance, feedback_fn)
11161
11162     # disks are created, waiting for sync
11163     disk_abort = not _WaitForSync(self, instance,
11164                                   oneshot=not self.op.wait_for_sync)
11165     if disk_abort:
11166       raise errors.OpExecError("There are some degraded disks for"
11167                                " this instance, please cleanup manually")
11168
11169   def _ConvertDrbdToPlain(self, feedback_fn):
11170     """Converts an instance from drbd to plain.
11171
11172     """
11173     instance = self.instance
11174     assert len(instance.secondary_nodes) == 1
11175     pnode = instance.primary_node
11176     snode = instance.secondary_nodes[0]
11177     feedback_fn("Converting template to plain")
11178
11179     old_disks = instance.disks
11180     new_disks = [d.children[0] for d in old_disks]
11181
11182     # copy over size and mode
11183     for parent, child in zip(old_disks, new_disks):
11184       child.size = parent.size
11185       child.mode = parent.mode
11186
11187     # update instance structure
11188     instance.disks = new_disks
11189     instance.disk_template = constants.DT_PLAIN
11190     self.cfg.Update(instance, feedback_fn)
11191
11192     feedback_fn("Removing volumes on the secondary node...")
11193     for disk in old_disks:
11194       self.cfg.SetDiskID(disk, snode)
11195       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11196       if msg:
11197         self.LogWarning("Could not remove block device %s on node %s,"
11198                         " continuing anyway: %s", disk.iv_name, snode, msg)
11199
11200     feedback_fn("Removing unneeded volumes on the primary node...")
11201     for idx, disk in enumerate(old_disks):
11202       meta = disk.children[1]
11203       self.cfg.SetDiskID(meta, pnode)
11204       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11205       if msg:
11206         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11207                         " continuing anyway: %s", idx, pnode, msg)
11208
11209     # this is a DRBD disk, return its port to the pool
11210     for disk in old_disks:
11211       tcp_port = disk.logical_id[2]
11212       self.cfg.AddTcpUdpPort(tcp_port)
11213
11214   def Exec(self, feedback_fn):
11215     """Modifies an instance.
11216
11217     All parameters take effect only at the next restart of the instance.
11218
11219     """
11220     # Process here the warnings from CheckPrereq, as we don't have a
11221     # feedback_fn there.
11222     for warn in self.warn:
11223       feedback_fn("WARNING: %s" % warn)
11224
11225     result = []
11226     instance = self.instance
11227     # disk changes
11228     for disk_op, disk_dict in self.op.disks:
11229       if disk_op == constants.DDM_REMOVE:
11230         # remove the last disk
11231         device = instance.disks.pop()
11232         device_idx = len(instance.disks)
11233         for node, disk in device.ComputeNodeTree(instance.primary_node):
11234           self.cfg.SetDiskID(disk, node)
11235           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11236           if msg:
11237             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11238                             " continuing anyway", device_idx, node, msg)
11239         result.append(("disk/%d" % device_idx, "remove"))
11240
11241         # if this is a DRBD disk, return its port to the pool
11242         if device.dev_type in constants.LDS_DRBD:
11243           tcp_port = device.logical_id[2]
11244           self.cfg.AddTcpUdpPort(tcp_port)
11245       elif disk_op == constants.DDM_ADD:
11246         # add a new disk
11247         if instance.disk_template in (constants.DT_FILE,
11248                                         constants.DT_SHARED_FILE):
11249           file_driver, file_path = instance.disks[0].logical_id
11250           file_path = os.path.dirname(file_path)
11251         else:
11252           file_driver = file_path = None
11253         disk_idx_base = len(instance.disks)
11254         new_disk = _GenerateDiskTemplate(self,
11255                                          instance.disk_template,
11256                                          instance.name, instance.primary_node,
11257                                          instance.secondary_nodes,
11258                                          [disk_dict],
11259                                          file_path,
11260                                          file_driver,
11261                                          disk_idx_base, feedback_fn)[0]
11262         instance.disks.append(new_disk)
11263         info = _GetInstanceInfoText(instance)
11264
11265         logging.info("Creating volume %s for instance %s",
11266                      new_disk.iv_name, instance.name)
11267         # Note: this needs to be kept in sync with _CreateDisks
11268         #HARDCODE
11269         for node in instance.all_nodes:
11270           f_create = node == instance.primary_node
11271           try:
11272             _CreateBlockDev(self, node, instance, new_disk,
11273                             f_create, info, f_create)
11274           except errors.OpExecError, err:
11275             self.LogWarning("Failed to create volume %s (%s) on"
11276                             " node %s: %s",
11277                             new_disk.iv_name, new_disk, node, err)
11278         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11279                        (new_disk.size, new_disk.mode)))
11280       else:
11281         # change a given disk
11282         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11283         result.append(("disk.mode/%d" % disk_op,
11284                        disk_dict[constants.IDISK_MODE]))
11285
11286     if self.op.disk_template:
11287       r_shut = _ShutdownInstanceDisks(self, instance)
11288       if not r_shut:
11289         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11290                                  " proceed with disk template conversion")
11291       mode = (instance.disk_template, self.op.disk_template)
11292       try:
11293         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11294       except:
11295         self.cfg.ReleaseDRBDMinors(instance.name)
11296         raise
11297       result.append(("disk_template", self.op.disk_template))
11298
11299     # NIC changes
11300     for nic_op, nic_dict in self.op.nics:
11301       if nic_op == constants.DDM_REMOVE:
11302         # remove the last nic
11303         del instance.nics[-1]
11304         result.append(("nic.%d" % len(instance.nics), "remove"))
11305       elif nic_op == constants.DDM_ADD:
11306         # mac and bridge should be set, by now
11307         mac = nic_dict[constants.INIC_MAC]
11308         ip = nic_dict.get(constants.INIC_IP, None)
11309         nicparams = self.nic_pinst[constants.DDM_ADD]
11310         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11311         instance.nics.append(new_nic)
11312         result.append(("nic.%d" % (len(instance.nics) - 1),
11313                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11314                        (new_nic.mac, new_nic.ip,
11315                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11316                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11317                        )))
11318       else:
11319         for key in (constants.INIC_MAC, constants.INIC_IP):
11320           if key in nic_dict:
11321             setattr(instance.nics[nic_op], key, nic_dict[key])
11322         if nic_op in self.nic_pinst:
11323           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11324         for key, val in nic_dict.iteritems():
11325           result.append(("nic.%s/%d" % (key, nic_op), val))
11326
11327     # hvparams changes
11328     if self.op.hvparams:
11329       instance.hvparams = self.hv_inst
11330       for key, val in self.op.hvparams.iteritems():
11331         result.append(("hv/%s" % key, val))
11332
11333     # beparams changes
11334     if self.op.beparams:
11335       instance.beparams = self.be_inst
11336       for key, val in self.op.beparams.iteritems():
11337         result.append(("be/%s" % key, val))
11338
11339     # OS change
11340     if self.op.os_name:
11341       instance.os = self.op.os_name
11342
11343     # osparams changes
11344     if self.op.osparams:
11345       instance.osparams = self.os_inst
11346       for key, val in self.op.osparams.iteritems():
11347         result.append(("os/%s" % key, val))
11348
11349     self.cfg.Update(instance, feedback_fn)
11350
11351     return result
11352
11353   _DISK_CONVERSIONS = {
11354     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11355     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11356     }
11357
11358
11359 class LUInstanceChangeGroup(LogicalUnit):
11360   HPATH = "instance-change-group"
11361   HTYPE = constants.HTYPE_INSTANCE
11362   REQ_BGL = False
11363
11364   def ExpandNames(self):
11365     self.share_locks = _ShareAll()
11366     self.needed_locks = {
11367       locking.LEVEL_NODEGROUP: [],
11368       locking.LEVEL_NODE: [],
11369       }
11370
11371     self._ExpandAndLockInstance()
11372
11373     if self.op.target_groups:
11374       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11375                                   self.op.target_groups)
11376     else:
11377       self.req_target_uuids = None
11378
11379     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11380
11381   def DeclareLocks(self, level):
11382     if level == locking.LEVEL_NODEGROUP:
11383       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11384
11385       if self.req_target_uuids:
11386         lock_groups = set(self.req_target_uuids)
11387
11388         # Lock all groups used by instance optimistically; this requires going
11389         # via the node before it's locked, requiring verification later on
11390         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11391         lock_groups.update(instance_groups)
11392       else:
11393         # No target groups, need to lock all of them
11394         lock_groups = locking.ALL_SET
11395
11396       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11397
11398     elif level == locking.LEVEL_NODE:
11399       if self.req_target_uuids:
11400         # Lock all nodes used by instances
11401         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11402         self._LockInstancesNodes()
11403
11404         # Lock all nodes in all potential target groups
11405         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11406                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11407         member_nodes = [node_name
11408                         for group in lock_groups
11409                         for node_name in self.cfg.GetNodeGroup(group).members]
11410         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11411       else:
11412         # Lock all nodes as all groups are potential targets
11413         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11414
11415   def CheckPrereq(self):
11416     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11417     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11418     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11419
11420     assert (self.req_target_uuids is None or
11421             owned_groups.issuperset(self.req_target_uuids))
11422     assert owned_instances == set([self.op.instance_name])
11423
11424     # Get instance information
11425     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11426
11427     # Check if node groups for locked instance are still correct
11428     assert owned_nodes.issuperset(self.instance.all_nodes), \
11429       ("Instance %s's nodes changed while we kept the lock" %
11430        self.op.instance_name)
11431
11432     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11433                                            owned_groups)
11434
11435     if self.req_target_uuids:
11436       # User requested specific target groups
11437       self.target_uuids = self.req_target_uuids
11438     else:
11439       # All groups except those used by the instance are potential targets
11440       self.target_uuids = owned_groups - inst_groups
11441
11442     conflicting_groups = self.target_uuids & inst_groups
11443     if conflicting_groups:
11444       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11445                                  " used by the instance '%s'" %
11446                                  (utils.CommaJoin(conflicting_groups),
11447                                   self.op.instance_name),
11448                                  errors.ECODE_INVAL)
11449
11450     if not self.target_uuids:
11451       raise errors.OpPrereqError("There are no possible target groups",
11452                                  errors.ECODE_INVAL)
11453
11454   def BuildHooksEnv(self):
11455     """Build hooks env.
11456
11457     """
11458     assert self.target_uuids
11459
11460     env = {
11461       "TARGET_GROUPS": " ".join(self.target_uuids),
11462       }
11463
11464     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11465
11466     return env
11467
11468   def BuildHooksNodes(self):
11469     """Build hooks nodes.
11470
11471     """
11472     mn = self.cfg.GetMasterNode()
11473     return ([mn], [mn])
11474
11475   def Exec(self, feedback_fn):
11476     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11477
11478     assert instances == [self.op.instance_name], "Instance not locked"
11479
11480     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11481                      instances=instances, target_groups=list(self.target_uuids))
11482
11483     ial.Run(self.op.iallocator)
11484
11485     if not ial.success:
11486       raise errors.OpPrereqError("Can't compute solution for changing group of"
11487                                  " instance '%s' using iallocator '%s': %s" %
11488                                  (self.op.instance_name, self.op.iallocator,
11489                                   ial.info),
11490                                  errors.ECODE_NORES)
11491
11492     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11493
11494     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11495                  " instance '%s'", len(jobs), self.op.instance_name)
11496
11497     return ResultWithJobs(jobs)
11498
11499
11500 class LUBackupQuery(NoHooksLU):
11501   """Query the exports list
11502
11503   """
11504   REQ_BGL = False
11505
11506   def ExpandNames(self):
11507     self.needed_locks = {}
11508     self.share_locks[locking.LEVEL_NODE] = 1
11509     if not self.op.nodes:
11510       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11511     else:
11512       self.needed_locks[locking.LEVEL_NODE] = \
11513         _GetWantedNodes(self, self.op.nodes)
11514
11515   def Exec(self, feedback_fn):
11516     """Compute the list of all the exported system images.
11517
11518     @rtype: dict
11519     @return: a dictionary with the structure node->(export-list)
11520         where export-list is a list of the instances exported on
11521         that node.
11522
11523     """
11524     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11525     rpcresult = self.rpc.call_export_list(self.nodes)
11526     result = {}
11527     for node in rpcresult:
11528       if rpcresult[node].fail_msg:
11529         result[node] = False
11530       else:
11531         result[node] = rpcresult[node].payload
11532
11533     return result
11534
11535
11536 class LUBackupPrepare(NoHooksLU):
11537   """Prepares an instance for an export and returns useful information.
11538
11539   """
11540   REQ_BGL = False
11541
11542   def ExpandNames(self):
11543     self._ExpandAndLockInstance()
11544
11545   def CheckPrereq(self):
11546     """Check prerequisites.
11547
11548     """
11549     instance_name = self.op.instance_name
11550
11551     self.instance = self.cfg.GetInstanceInfo(instance_name)
11552     assert self.instance is not None, \
11553           "Cannot retrieve locked instance %s" % self.op.instance_name
11554     _CheckNodeOnline(self, self.instance.primary_node)
11555
11556     self._cds = _GetClusterDomainSecret()
11557
11558   def Exec(self, feedback_fn):
11559     """Prepares an instance for an export.
11560
11561     """
11562     instance = self.instance
11563
11564     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11565       salt = utils.GenerateSecret(8)
11566
11567       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11568       result = self.rpc.call_x509_cert_create(instance.primary_node,
11569                                               constants.RIE_CERT_VALIDITY)
11570       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11571
11572       (name, cert_pem) = result.payload
11573
11574       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11575                                              cert_pem)
11576
11577       return {
11578         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11579         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11580                           salt),
11581         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11582         }
11583
11584     return None
11585
11586
11587 class LUBackupExport(LogicalUnit):
11588   """Export an instance to an image in the cluster.
11589
11590   """
11591   HPATH = "instance-export"
11592   HTYPE = constants.HTYPE_INSTANCE
11593   REQ_BGL = False
11594
11595   def CheckArguments(self):
11596     """Check the arguments.
11597
11598     """
11599     self.x509_key_name = self.op.x509_key_name
11600     self.dest_x509_ca_pem = self.op.destination_x509_ca
11601
11602     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11603       if not self.x509_key_name:
11604         raise errors.OpPrereqError("Missing X509 key name for encryption",
11605                                    errors.ECODE_INVAL)
11606
11607       if not self.dest_x509_ca_pem:
11608         raise errors.OpPrereqError("Missing destination X509 CA",
11609                                    errors.ECODE_INVAL)
11610
11611   def ExpandNames(self):
11612     self._ExpandAndLockInstance()
11613
11614     # Lock all nodes for local exports
11615     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11616       # FIXME: lock only instance primary and destination node
11617       #
11618       # Sad but true, for now we have do lock all nodes, as we don't know where
11619       # the previous export might be, and in this LU we search for it and
11620       # remove it from its current node. In the future we could fix this by:
11621       #  - making a tasklet to search (share-lock all), then create the
11622       #    new one, then one to remove, after
11623       #  - removing the removal operation altogether
11624       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11625
11626   def DeclareLocks(self, level):
11627     """Last minute lock declaration."""
11628     # All nodes are locked anyway, so nothing to do here.
11629
11630   def BuildHooksEnv(self):
11631     """Build hooks env.
11632
11633     This will run on the master, primary node and target node.
11634
11635     """
11636     env = {
11637       "EXPORT_MODE": self.op.mode,
11638       "EXPORT_NODE": self.op.target_node,
11639       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11640       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11641       # TODO: Generic function for boolean env variables
11642       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11643       }
11644
11645     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11646
11647     return env
11648
11649   def BuildHooksNodes(self):
11650     """Build hooks nodes.
11651
11652     """
11653     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11654
11655     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11656       nl.append(self.op.target_node)
11657
11658     return (nl, nl)
11659
11660   def CheckPrereq(self):
11661     """Check prerequisites.
11662
11663     This checks that the instance and node names are valid.
11664
11665     """
11666     instance_name = self.op.instance_name
11667
11668     self.instance = self.cfg.GetInstanceInfo(instance_name)
11669     assert self.instance is not None, \
11670           "Cannot retrieve locked instance %s" % self.op.instance_name
11671     _CheckNodeOnline(self, self.instance.primary_node)
11672
11673     if (self.op.remove_instance and self.instance.admin_up and
11674         not self.op.shutdown):
11675       raise errors.OpPrereqError("Can not remove instance without shutting it"
11676                                  " down before")
11677
11678     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11679       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11680       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11681       assert self.dst_node is not None
11682
11683       _CheckNodeOnline(self, self.dst_node.name)
11684       _CheckNodeNotDrained(self, self.dst_node.name)
11685
11686       self._cds = None
11687       self.dest_disk_info = None
11688       self.dest_x509_ca = None
11689
11690     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11691       self.dst_node = None
11692
11693       if len(self.op.target_node) != len(self.instance.disks):
11694         raise errors.OpPrereqError(("Received destination information for %s"
11695                                     " disks, but instance %s has %s disks") %
11696                                    (len(self.op.target_node), instance_name,
11697                                     len(self.instance.disks)),
11698                                    errors.ECODE_INVAL)
11699
11700       cds = _GetClusterDomainSecret()
11701
11702       # Check X509 key name
11703       try:
11704         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11705       except (TypeError, ValueError), err:
11706         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11707
11708       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11709         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11710                                    errors.ECODE_INVAL)
11711
11712       # Load and verify CA
11713       try:
11714         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11715       except OpenSSL.crypto.Error, err:
11716         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11717                                    (err, ), errors.ECODE_INVAL)
11718
11719       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11720       if errcode is not None:
11721         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11722                                    (msg, ), errors.ECODE_INVAL)
11723
11724       self.dest_x509_ca = cert
11725
11726       # Verify target information
11727       disk_info = []
11728       for idx, disk_data in enumerate(self.op.target_node):
11729         try:
11730           (host, port, magic) = \
11731             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11732         except errors.GenericError, err:
11733           raise errors.OpPrereqError("Target info for disk %s: %s" %
11734                                      (idx, err), errors.ECODE_INVAL)
11735
11736         disk_info.append((host, port, magic))
11737
11738       assert len(disk_info) == len(self.op.target_node)
11739       self.dest_disk_info = disk_info
11740
11741     else:
11742       raise errors.ProgrammerError("Unhandled export mode %r" %
11743                                    self.op.mode)
11744
11745     # instance disk type verification
11746     # TODO: Implement export support for file-based disks
11747     for disk in self.instance.disks:
11748       if disk.dev_type == constants.LD_FILE:
11749         raise errors.OpPrereqError("Export not supported for instances with"
11750                                    " file-based disks", errors.ECODE_INVAL)
11751
11752   def _CleanupExports(self, feedback_fn):
11753     """Removes exports of current instance from all other nodes.
11754
11755     If an instance in a cluster with nodes A..D was exported to node C, its
11756     exports will be removed from the nodes A, B and D.
11757
11758     """
11759     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11760
11761     nodelist = self.cfg.GetNodeList()
11762     nodelist.remove(self.dst_node.name)
11763
11764     # on one-node clusters nodelist will be empty after the removal
11765     # if we proceed the backup would be removed because OpBackupQuery
11766     # substitutes an empty list with the full cluster node list.
11767     iname = self.instance.name
11768     if nodelist:
11769       feedback_fn("Removing old exports for instance %s" % iname)
11770       exportlist = self.rpc.call_export_list(nodelist)
11771       for node in exportlist:
11772         if exportlist[node].fail_msg:
11773           continue
11774         if iname in exportlist[node].payload:
11775           msg = self.rpc.call_export_remove(node, iname).fail_msg
11776           if msg:
11777             self.LogWarning("Could not remove older export for instance %s"
11778                             " on node %s: %s", iname, node, msg)
11779
11780   def Exec(self, feedback_fn):
11781     """Export an instance to an image in the cluster.
11782
11783     """
11784     assert self.op.mode in constants.EXPORT_MODES
11785
11786     instance = self.instance
11787     src_node = instance.primary_node
11788
11789     if self.op.shutdown:
11790       # shutdown the instance, but not the disks
11791       feedback_fn("Shutting down instance %s" % instance.name)
11792       result = self.rpc.call_instance_shutdown(src_node, instance,
11793                                                self.op.shutdown_timeout)
11794       # TODO: Maybe ignore failures if ignore_remove_failures is set
11795       result.Raise("Could not shutdown instance %s on"
11796                    " node %s" % (instance.name, src_node))
11797
11798     # set the disks ID correctly since call_instance_start needs the
11799     # correct drbd minor to create the symlinks
11800     for disk in instance.disks:
11801       self.cfg.SetDiskID(disk, src_node)
11802
11803     activate_disks = (not instance.admin_up)
11804
11805     if activate_disks:
11806       # Activate the instance disks if we'exporting a stopped instance
11807       feedback_fn("Activating disks for %s" % instance.name)
11808       _StartInstanceDisks(self, instance, None)
11809
11810     try:
11811       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11812                                                      instance)
11813
11814       helper.CreateSnapshots()
11815       try:
11816         if (self.op.shutdown and instance.admin_up and
11817             not self.op.remove_instance):
11818           assert not activate_disks
11819           feedback_fn("Starting instance %s" % instance.name)
11820           result = self.rpc.call_instance_start(src_node, instance,
11821                                                 None, None, False)
11822           msg = result.fail_msg
11823           if msg:
11824             feedback_fn("Failed to start instance: %s" % msg)
11825             _ShutdownInstanceDisks(self, instance)
11826             raise errors.OpExecError("Could not start instance: %s" % msg)
11827
11828         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11829           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11830         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11831           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11832           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11833
11834           (key_name, _, _) = self.x509_key_name
11835
11836           dest_ca_pem = \
11837             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11838                                             self.dest_x509_ca)
11839
11840           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11841                                                      key_name, dest_ca_pem,
11842                                                      timeouts)
11843       finally:
11844         helper.Cleanup()
11845
11846       # Check for backwards compatibility
11847       assert len(dresults) == len(instance.disks)
11848       assert compat.all(isinstance(i, bool) for i in dresults), \
11849              "Not all results are boolean: %r" % dresults
11850
11851     finally:
11852       if activate_disks:
11853         feedback_fn("Deactivating disks for %s" % instance.name)
11854         _ShutdownInstanceDisks(self, instance)
11855
11856     if not (compat.all(dresults) and fin_resu):
11857       failures = []
11858       if not fin_resu:
11859         failures.append("export finalization")
11860       if not compat.all(dresults):
11861         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11862                                if not dsk)
11863         failures.append("disk export: disk(s) %s" % fdsk)
11864
11865       raise errors.OpExecError("Export failed, errors in %s" %
11866                                utils.CommaJoin(failures))
11867
11868     # At this point, the export was successful, we can cleanup/finish
11869
11870     # Remove instance if requested
11871     if self.op.remove_instance:
11872       feedback_fn("Removing instance %s" % instance.name)
11873       _RemoveInstance(self, feedback_fn, instance,
11874                       self.op.ignore_remove_failures)
11875
11876     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11877       self._CleanupExports(feedback_fn)
11878
11879     return fin_resu, dresults
11880
11881
11882 class LUBackupRemove(NoHooksLU):
11883   """Remove exports related to the named instance.
11884
11885   """
11886   REQ_BGL = False
11887
11888   def ExpandNames(self):
11889     self.needed_locks = {}
11890     # We need all nodes to be locked in order for RemoveExport to work, but we
11891     # don't need to lock the instance itself, as nothing will happen to it (and
11892     # we can remove exports also for a removed instance)
11893     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11894
11895   def Exec(self, feedback_fn):
11896     """Remove any export.
11897
11898     """
11899     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11900     # If the instance was not found we'll try with the name that was passed in.
11901     # This will only work if it was an FQDN, though.
11902     fqdn_warn = False
11903     if not instance_name:
11904       fqdn_warn = True
11905       instance_name = self.op.instance_name
11906
11907     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11908     exportlist = self.rpc.call_export_list(locked_nodes)
11909     found = False
11910     for node in exportlist:
11911       msg = exportlist[node].fail_msg
11912       if msg:
11913         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11914         continue
11915       if instance_name in exportlist[node].payload:
11916         found = True
11917         result = self.rpc.call_export_remove(node, instance_name)
11918         msg = result.fail_msg
11919         if msg:
11920           logging.error("Could not remove export for instance %s"
11921                         " on node %s: %s", instance_name, node, msg)
11922
11923     if fqdn_warn and not found:
11924       feedback_fn("Export not found. If trying to remove an export belonging"
11925                   " to a deleted instance please use its Fully Qualified"
11926                   " Domain Name.")
11927
11928
11929 class LUGroupAdd(LogicalUnit):
11930   """Logical unit for creating node groups.
11931
11932   """
11933   HPATH = "group-add"
11934   HTYPE = constants.HTYPE_GROUP
11935   REQ_BGL = False
11936
11937   def ExpandNames(self):
11938     # We need the new group's UUID here so that we can create and acquire the
11939     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11940     # that it should not check whether the UUID exists in the configuration.
11941     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11942     self.needed_locks = {}
11943     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11944
11945   def CheckPrereq(self):
11946     """Check prerequisites.
11947
11948     This checks that the given group name is not an existing node group
11949     already.
11950
11951     """
11952     try:
11953       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11954     except errors.OpPrereqError:
11955       pass
11956     else:
11957       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11958                                  " node group (UUID: %s)" %
11959                                  (self.op.group_name, existing_uuid),
11960                                  errors.ECODE_EXISTS)
11961
11962     if self.op.ndparams:
11963       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11964
11965   def BuildHooksEnv(self):
11966     """Build hooks env.
11967
11968     """
11969     return {
11970       "GROUP_NAME": self.op.group_name,
11971       }
11972
11973   def BuildHooksNodes(self):
11974     """Build hooks nodes.
11975
11976     """
11977     mn = self.cfg.GetMasterNode()
11978     return ([mn], [mn])
11979
11980   def Exec(self, feedback_fn):
11981     """Add the node group to the cluster.
11982
11983     """
11984     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11985                                   uuid=self.group_uuid,
11986                                   alloc_policy=self.op.alloc_policy,
11987                                   ndparams=self.op.ndparams)
11988
11989     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11990     del self.remove_locks[locking.LEVEL_NODEGROUP]
11991
11992
11993 class LUGroupAssignNodes(NoHooksLU):
11994   """Logical unit for assigning nodes to groups.
11995
11996   """
11997   REQ_BGL = False
11998
11999   def ExpandNames(self):
12000     # These raise errors.OpPrereqError on their own:
12001     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12002     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12003
12004     # We want to lock all the affected nodes and groups. We have readily
12005     # available the list of nodes, and the *destination* group. To gather the
12006     # list of "source" groups, we need to fetch node information later on.
12007     self.needed_locks = {
12008       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12009       locking.LEVEL_NODE: self.op.nodes,
12010       }
12011
12012   def DeclareLocks(self, level):
12013     if level == locking.LEVEL_NODEGROUP:
12014       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12015
12016       # Try to get all affected nodes' groups without having the group or node
12017       # lock yet. Needs verification later in the code flow.
12018       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12019
12020       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12021
12022   def CheckPrereq(self):
12023     """Check prerequisites.
12024
12025     """
12026     assert self.needed_locks[locking.LEVEL_NODEGROUP]
12027     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12028             frozenset(self.op.nodes))
12029
12030     expected_locks = (set([self.group_uuid]) |
12031                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12032     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12033     if actual_locks != expected_locks:
12034       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12035                                " current groups are '%s', used to be '%s'" %
12036                                (utils.CommaJoin(expected_locks),
12037                                 utils.CommaJoin(actual_locks)))
12038
12039     self.node_data = self.cfg.GetAllNodesInfo()
12040     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12041     instance_data = self.cfg.GetAllInstancesInfo()
12042
12043     if self.group is None:
12044       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12045                                (self.op.group_name, self.group_uuid))
12046
12047     (new_splits, previous_splits) = \
12048       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12049                                              for node in self.op.nodes],
12050                                             self.node_data, instance_data)
12051
12052     if new_splits:
12053       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12054
12055       if not self.op.force:
12056         raise errors.OpExecError("The following instances get split by this"
12057                                  " change and --force was not given: %s" %
12058                                  fmt_new_splits)
12059       else:
12060         self.LogWarning("This operation will split the following instances: %s",
12061                         fmt_new_splits)
12062
12063         if previous_splits:
12064           self.LogWarning("In addition, these already-split instances continue"
12065                           " to be split across groups: %s",
12066                           utils.CommaJoin(utils.NiceSort(previous_splits)))
12067
12068   def Exec(self, feedback_fn):
12069     """Assign nodes to a new group.
12070
12071     """
12072     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12073
12074     self.cfg.AssignGroupNodes(mods)
12075
12076   @staticmethod
12077   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12078     """Check for split instances after a node assignment.
12079
12080     This method considers a series of node assignments as an atomic operation,
12081     and returns information about split instances after applying the set of
12082     changes.
12083
12084     In particular, it returns information about newly split instances, and
12085     instances that were already split, and remain so after the change.
12086
12087     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12088     considered.
12089
12090     @type changes: list of (node_name, new_group_uuid) pairs.
12091     @param changes: list of node assignments to consider.
12092     @param node_data: a dict with data for all nodes
12093     @param instance_data: a dict with all instances to consider
12094     @rtype: a two-tuple
12095     @return: a list of instances that were previously okay and result split as a
12096       consequence of this change, and a list of instances that were previously
12097       split and this change does not fix.
12098
12099     """
12100     changed_nodes = dict((node, group) for node, group in changes
12101                          if node_data[node].group != group)
12102
12103     all_split_instances = set()
12104     previously_split_instances = set()
12105
12106     def InstanceNodes(instance):
12107       return [instance.primary_node] + list(instance.secondary_nodes)
12108
12109     for inst in instance_data.values():
12110       if inst.disk_template not in constants.DTS_INT_MIRROR:
12111         continue
12112
12113       instance_nodes = InstanceNodes(inst)
12114
12115       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12116         previously_split_instances.add(inst.name)
12117
12118       if len(set(changed_nodes.get(node, node_data[node].group)
12119                  for node in instance_nodes)) > 1:
12120         all_split_instances.add(inst.name)
12121
12122     return (list(all_split_instances - previously_split_instances),
12123             list(previously_split_instances & all_split_instances))
12124
12125
12126 class _GroupQuery(_QueryBase):
12127   FIELDS = query.GROUP_FIELDS
12128
12129   def ExpandNames(self, lu):
12130     lu.needed_locks = {}
12131
12132     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12133     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12134
12135     if not self.names:
12136       self.wanted = [name_to_uuid[name]
12137                      for name in utils.NiceSort(name_to_uuid.keys())]
12138     else:
12139       # Accept names to be either names or UUIDs.
12140       missing = []
12141       self.wanted = []
12142       all_uuid = frozenset(self._all_groups.keys())
12143
12144       for name in self.names:
12145         if name in all_uuid:
12146           self.wanted.append(name)
12147         elif name in name_to_uuid:
12148           self.wanted.append(name_to_uuid[name])
12149         else:
12150           missing.append(name)
12151
12152       if missing:
12153         raise errors.OpPrereqError("Some groups do not exist: %s" %
12154                                    utils.CommaJoin(missing),
12155                                    errors.ECODE_NOENT)
12156
12157   def DeclareLocks(self, lu, level):
12158     pass
12159
12160   def _GetQueryData(self, lu):
12161     """Computes the list of node groups and their attributes.
12162
12163     """
12164     do_nodes = query.GQ_NODE in self.requested_data
12165     do_instances = query.GQ_INST in self.requested_data
12166
12167     group_to_nodes = None
12168     group_to_instances = None
12169
12170     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12171     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12172     # latter GetAllInstancesInfo() is not enough, for we have to go through
12173     # instance->node. Hence, we will need to process nodes even if we only need
12174     # instance information.
12175     if do_nodes or do_instances:
12176       all_nodes = lu.cfg.GetAllNodesInfo()
12177       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12178       node_to_group = {}
12179
12180       for node in all_nodes.values():
12181         if node.group in group_to_nodes:
12182           group_to_nodes[node.group].append(node.name)
12183           node_to_group[node.name] = node.group
12184
12185       if do_instances:
12186         all_instances = lu.cfg.GetAllInstancesInfo()
12187         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12188
12189         for instance in all_instances.values():
12190           node = instance.primary_node
12191           if node in node_to_group:
12192             group_to_instances[node_to_group[node]].append(instance.name)
12193
12194         if not do_nodes:
12195           # Do not pass on node information if it was not requested.
12196           group_to_nodes = None
12197
12198     return query.GroupQueryData([self._all_groups[uuid]
12199                                  for uuid in self.wanted],
12200                                 group_to_nodes, group_to_instances)
12201
12202
12203 class LUGroupQuery(NoHooksLU):
12204   """Logical unit for querying node groups.
12205
12206   """
12207   REQ_BGL = False
12208
12209   def CheckArguments(self):
12210     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12211                           self.op.output_fields, False)
12212
12213   def ExpandNames(self):
12214     self.gq.ExpandNames(self)
12215
12216   def DeclareLocks(self, level):
12217     self.gq.DeclareLocks(self, level)
12218
12219   def Exec(self, feedback_fn):
12220     return self.gq.OldStyleQuery(self)
12221
12222
12223 class LUGroupSetParams(LogicalUnit):
12224   """Modifies the parameters of a node group.
12225
12226   """
12227   HPATH = "group-modify"
12228   HTYPE = constants.HTYPE_GROUP
12229   REQ_BGL = False
12230
12231   def CheckArguments(self):
12232     all_changes = [
12233       self.op.ndparams,
12234       self.op.alloc_policy,
12235       ]
12236
12237     if all_changes.count(None) == len(all_changes):
12238       raise errors.OpPrereqError("Please pass at least one modification",
12239                                  errors.ECODE_INVAL)
12240
12241   def ExpandNames(self):
12242     # This raises errors.OpPrereqError on its own:
12243     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12244
12245     self.needed_locks = {
12246       locking.LEVEL_NODEGROUP: [self.group_uuid],
12247       }
12248
12249   def CheckPrereq(self):
12250     """Check prerequisites.
12251
12252     """
12253     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12254
12255     if self.group is None:
12256       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12257                                (self.op.group_name, self.group_uuid))
12258
12259     if self.op.ndparams:
12260       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12261       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12262       self.new_ndparams = new_ndparams
12263
12264   def BuildHooksEnv(self):
12265     """Build hooks env.
12266
12267     """
12268     return {
12269       "GROUP_NAME": self.op.group_name,
12270       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12271       }
12272
12273   def BuildHooksNodes(self):
12274     """Build hooks nodes.
12275
12276     """
12277     mn = self.cfg.GetMasterNode()
12278     return ([mn], [mn])
12279
12280   def Exec(self, feedback_fn):
12281     """Modifies the node group.
12282
12283     """
12284     result = []
12285
12286     if self.op.ndparams:
12287       self.group.ndparams = self.new_ndparams
12288       result.append(("ndparams", str(self.group.ndparams)))
12289
12290     if self.op.alloc_policy:
12291       self.group.alloc_policy = self.op.alloc_policy
12292
12293     self.cfg.Update(self.group, feedback_fn)
12294     return result
12295
12296
12297 class LUGroupRemove(LogicalUnit):
12298   HPATH = "group-remove"
12299   HTYPE = constants.HTYPE_GROUP
12300   REQ_BGL = False
12301
12302   def ExpandNames(self):
12303     # This will raises errors.OpPrereqError on its own:
12304     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12305     self.needed_locks = {
12306       locking.LEVEL_NODEGROUP: [self.group_uuid],
12307       }
12308
12309   def CheckPrereq(self):
12310     """Check prerequisites.
12311
12312     This checks that the given group name exists as a node group, that is
12313     empty (i.e., contains no nodes), and that is not the last group of the
12314     cluster.
12315
12316     """
12317     # Verify that the group is empty.
12318     group_nodes = [node.name
12319                    for node in self.cfg.GetAllNodesInfo().values()
12320                    if node.group == self.group_uuid]
12321
12322     if group_nodes:
12323       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12324                                  " nodes: %s" %
12325                                  (self.op.group_name,
12326                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12327                                  errors.ECODE_STATE)
12328
12329     # Verify the cluster would not be left group-less.
12330     if len(self.cfg.GetNodeGroupList()) == 1:
12331       raise errors.OpPrereqError("Group '%s' is the only group,"
12332                                  " cannot be removed" %
12333                                  self.op.group_name,
12334                                  errors.ECODE_STATE)
12335
12336   def BuildHooksEnv(self):
12337     """Build hooks env.
12338
12339     """
12340     return {
12341       "GROUP_NAME": self.op.group_name,
12342       }
12343
12344   def BuildHooksNodes(self):
12345     """Build hooks nodes.
12346
12347     """
12348     mn = self.cfg.GetMasterNode()
12349     return ([mn], [mn])
12350
12351   def Exec(self, feedback_fn):
12352     """Remove the node group.
12353
12354     """
12355     try:
12356       self.cfg.RemoveNodeGroup(self.group_uuid)
12357     except errors.ConfigurationError:
12358       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12359                                (self.op.group_name, self.group_uuid))
12360
12361     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12362
12363
12364 class LUGroupRename(LogicalUnit):
12365   HPATH = "group-rename"
12366   HTYPE = constants.HTYPE_GROUP
12367   REQ_BGL = False
12368
12369   def ExpandNames(self):
12370     # This raises errors.OpPrereqError on its own:
12371     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12372
12373     self.needed_locks = {
12374       locking.LEVEL_NODEGROUP: [self.group_uuid],
12375       }
12376
12377   def CheckPrereq(self):
12378     """Check prerequisites.
12379
12380     Ensures requested new name is not yet used.
12381
12382     """
12383     try:
12384       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12385     except errors.OpPrereqError:
12386       pass
12387     else:
12388       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12389                                  " node group (UUID: %s)" %
12390                                  (self.op.new_name, new_name_uuid),
12391                                  errors.ECODE_EXISTS)
12392
12393   def BuildHooksEnv(self):
12394     """Build hooks env.
12395
12396     """
12397     return {
12398       "OLD_NAME": self.op.group_name,
12399       "NEW_NAME": self.op.new_name,
12400       }
12401
12402   def BuildHooksNodes(self):
12403     """Build hooks nodes.
12404
12405     """
12406     mn = self.cfg.GetMasterNode()
12407
12408     all_nodes = self.cfg.GetAllNodesInfo()
12409     all_nodes.pop(mn, None)
12410
12411     run_nodes = [mn]
12412     run_nodes.extend(node.name for node in all_nodes.values()
12413                      if node.group == self.group_uuid)
12414
12415     return (run_nodes, run_nodes)
12416
12417   def Exec(self, feedback_fn):
12418     """Rename the node group.
12419
12420     """
12421     group = self.cfg.GetNodeGroup(self.group_uuid)
12422
12423     if group is None:
12424       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12425                                (self.op.group_name, self.group_uuid))
12426
12427     group.name = self.op.new_name
12428     self.cfg.Update(group, feedback_fn)
12429
12430     return self.op.new_name
12431
12432
12433 class LUGroupEvacuate(LogicalUnit):
12434   HPATH = "group-evacuate"
12435   HTYPE = constants.HTYPE_GROUP
12436   REQ_BGL = False
12437
12438   def ExpandNames(self):
12439     # This raises errors.OpPrereqError on its own:
12440     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12441
12442     if self.op.target_groups:
12443       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12444                                   self.op.target_groups)
12445     else:
12446       self.req_target_uuids = []
12447
12448     if self.group_uuid in self.req_target_uuids:
12449       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12450                                  " as a target group (targets are %s)" %
12451                                  (self.group_uuid,
12452                                   utils.CommaJoin(self.req_target_uuids)),
12453                                  errors.ECODE_INVAL)
12454
12455     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12456
12457     self.share_locks = _ShareAll()
12458     self.needed_locks = {
12459       locking.LEVEL_INSTANCE: [],
12460       locking.LEVEL_NODEGROUP: [],
12461       locking.LEVEL_NODE: [],
12462       }
12463
12464   def DeclareLocks(self, level):
12465     if level == locking.LEVEL_INSTANCE:
12466       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12467
12468       # Lock instances optimistically, needs verification once node and group
12469       # locks have been acquired
12470       self.needed_locks[locking.LEVEL_INSTANCE] = \
12471         self.cfg.GetNodeGroupInstances(self.group_uuid)
12472
12473     elif level == locking.LEVEL_NODEGROUP:
12474       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12475
12476       if self.req_target_uuids:
12477         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12478
12479         # Lock all groups used by instances optimistically; this requires going
12480         # via the node before it's locked, requiring verification later on
12481         lock_groups.update(group_uuid
12482                            for instance_name in
12483                              self.owned_locks(locking.LEVEL_INSTANCE)
12484                            for group_uuid in
12485                              self.cfg.GetInstanceNodeGroups(instance_name))
12486       else:
12487         # No target groups, need to lock all of them
12488         lock_groups = locking.ALL_SET
12489
12490       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12491
12492     elif level == locking.LEVEL_NODE:
12493       # This will only lock the nodes in the group to be evacuated which
12494       # contain actual instances
12495       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12496       self._LockInstancesNodes()
12497
12498       # Lock all nodes in group to be evacuated and target groups
12499       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12500       assert self.group_uuid in owned_groups
12501       member_nodes = [node_name
12502                       for group in owned_groups
12503                       for node_name in self.cfg.GetNodeGroup(group).members]
12504       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12505
12506   def CheckPrereq(self):
12507     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12508     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12509     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12510
12511     assert owned_groups.issuperset(self.req_target_uuids)
12512     assert self.group_uuid in owned_groups
12513
12514     # Check if locked instances are still correct
12515     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12516
12517     # Get instance information
12518     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12519
12520     # Check if node groups for locked instances are still correct
12521     for instance_name in owned_instances:
12522       inst = self.instances[instance_name]
12523       assert owned_nodes.issuperset(inst.all_nodes), \
12524         "Instance %s's nodes changed while we kept the lock" % instance_name
12525
12526       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12527                                              owned_groups)
12528
12529       assert self.group_uuid in inst_groups, \
12530         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12531
12532     if self.req_target_uuids:
12533       # User requested specific target groups
12534       self.target_uuids = self.req_target_uuids
12535     else:
12536       # All groups except the one to be evacuated are potential targets
12537       self.target_uuids = [group_uuid for group_uuid in owned_groups
12538                            if group_uuid != self.group_uuid]
12539
12540       if not self.target_uuids:
12541         raise errors.OpPrereqError("There are no possible target groups",
12542                                    errors.ECODE_INVAL)
12543
12544   def BuildHooksEnv(self):
12545     """Build hooks env.
12546
12547     """
12548     return {
12549       "GROUP_NAME": self.op.group_name,
12550       "TARGET_GROUPS": " ".join(self.target_uuids),
12551       }
12552
12553   def BuildHooksNodes(self):
12554     """Build hooks nodes.
12555
12556     """
12557     mn = self.cfg.GetMasterNode()
12558
12559     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12560
12561     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12562
12563     return (run_nodes, run_nodes)
12564
12565   def Exec(self, feedback_fn):
12566     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12567
12568     assert self.group_uuid not in self.target_uuids
12569
12570     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12571                      instances=instances, target_groups=self.target_uuids)
12572
12573     ial.Run(self.op.iallocator)
12574
12575     if not ial.success:
12576       raise errors.OpPrereqError("Can't compute group evacuation using"
12577                                  " iallocator '%s': %s" %
12578                                  (self.op.iallocator, ial.info),
12579                                  errors.ECODE_NORES)
12580
12581     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12582
12583     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12584                  len(jobs), self.op.group_name)
12585
12586     return ResultWithJobs(jobs)
12587
12588
12589 class TagsLU(NoHooksLU): # pylint: disable=W0223
12590   """Generic tags LU.
12591
12592   This is an abstract class which is the parent of all the other tags LUs.
12593
12594   """
12595   def ExpandNames(self):
12596     self.group_uuid = None
12597     self.needed_locks = {}
12598     if self.op.kind == constants.TAG_NODE:
12599       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12600       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12601     elif self.op.kind == constants.TAG_INSTANCE:
12602       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12603       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12604     elif self.op.kind == constants.TAG_NODEGROUP:
12605       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12606
12607     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12608     # not possible to acquire the BGL based on opcode parameters)
12609
12610   def CheckPrereq(self):
12611     """Check prerequisites.
12612
12613     """
12614     if self.op.kind == constants.TAG_CLUSTER:
12615       self.target = self.cfg.GetClusterInfo()
12616     elif self.op.kind == constants.TAG_NODE:
12617       self.target = self.cfg.GetNodeInfo(self.op.name)
12618     elif self.op.kind == constants.TAG_INSTANCE:
12619       self.target = self.cfg.GetInstanceInfo(self.op.name)
12620     elif self.op.kind == constants.TAG_NODEGROUP:
12621       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12622     else:
12623       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12624                                  str(self.op.kind), errors.ECODE_INVAL)
12625
12626
12627 class LUTagsGet(TagsLU):
12628   """Returns the tags of a given object.
12629
12630   """
12631   REQ_BGL = False
12632
12633   def ExpandNames(self):
12634     TagsLU.ExpandNames(self)
12635
12636     # Share locks as this is only a read operation
12637     self.share_locks = _ShareAll()
12638
12639   def Exec(self, feedback_fn):
12640     """Returns the tag list.
12641
12642     """
12643     return list(self.target.GetTags())
12644
12645
12646 class LUTagsSearch(NoHooksLU):
12647   """Searches the tags for a given pattern.
12648
12649   """
12650   REQ_BGL = False
12651
12652   def ExpandNames(self):
12653     self.needed_locks = {}
12654
12655   def CheckPrereq(self):
12656     """Check prerequisites.
12657
12658     This checks the pattern passed for validity by compiling it.
12659
12660     """
12661     try:
12662       self.re = re.compile(self.op.pattern)
12663     except re.error, err:
12664       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12665                                  (self.op.pattern, err), errors.ECODE_INVAL)
12666
12667   def Exec(self, feedback_fn):
12668     """Returns the tag list.
12669
12670     """
12671     cfg = self.cfg
12672     tgts = [("/cluster", cfg.GetClusterInfo())]
12673     ilist = cfg.GetAllInstancesInfo().values()
12674     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12675     nlist = cfg.GetAllNodesInfo().values()
12676     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12677     tgts.extend(("/nodegroup/%s" % n.name, n)
12678                 for n in cfg.GetAllNodeGroupsInfo().values())
12679     results = []
12680     for path, target in tgts:
12681       for tag in target.GetTags():
12682         if self.re.search(tag):
12683           results.append((path, tag))
12684     return results
12685
12686
12687 class LUTagsSet(TagsLU):
12688   """Sets a tag on a given object.
12689
12690   """
12691   REQ_BGL = False
12692
12693   def CheckPrereq(self):
12694     """Check prerequisites.
12695
12696     This checks the type and length of the tag name and value.
12697
12698     """
12699     TagsLU.CheckPrereq(self)
12700     for tag in self.op.tags:
12701       objects.TaggableObject.ValidateTag(tag)
12702
12703   def Exec(self, feedback_fn):
12704     """Sets the tag.
12705
12706     """
12707     try:
12708       for tag in self.op.tags:
12709         self.target.AddTag(tag)
12710     except errors.TagError, err:
12711       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12712     self.cfg.Update(self.target, feedback_fn)
12713
12714
12715 class LUTagsDel(TagsLU):
12716   """Delete a list of tags from a given object.
12717
12718   """
12719   REQ_BGL = False
12720
12721   def CheckPrereq(self):
12722     """Check prerequisites.
12723
12724     This checks that we have the given tag.
12725
12726     """
12727     TagsLU.CheckPrereq(self)
12728     for tag in self.op.tags:
12729       objects.TaggableObject.ValidateTag(tag)
12730     del_tags = frozenset(self.op.tags)
12731     cur_tags = self.target.GetTags()
12732
12733     diff_tags = del_tags - cur_tags
12734     if diff_tags:
12735       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12736       raise errors.OpPrereqError("Tag(s) %s not found" %
12737                                  (utils.CommaJoin(diff_names), ),
12738                                  errors.ECODE_NOENT)
12739
12740   def Exec(self, feedback_fn):
12741     """Remove the tag from the object.
12742
12743     """
12744     for tag in self.op.tags:
12745       self.target.RemoveTag(tag)
12746     self.cfg.Update(self.target, feedback_fn)
12747
12748
12749 class LUTestDelay(NoHooksLU):
12750   """Sleep for a specified amount of time.
12751
12752   This LU sleeps on the master and/or nodes for a specified amount of
12753   time.
12754
12755   """
12756   REQ_BGL = False
12757
12758   def ExpandNames(self):
12759     """Expand names and set required locks.
12760
12761     This expands the node list, if any.
12762
12763     """
12764     self.needed_locks = {}
12765     if self.op.on_nodes:
12766       # _GetWantedNodes can be used here, but is not always appropriate to use
12767       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12768       # more information.
12769       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12770       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12771
12772   def _TestDelay(self):
12773     """Do the actual sleep.
12774
12775     """
12776     if self.op.on_master:
12777       if not utils.TestDelay(self.op.duration):
12778         raise errors.OpExecError("Error during master delay test")
12779     if self.op.on_nodes:
12780       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12781       for node, node_result in result.items():
12782         node_result.Raise("Failure during rpc call to node %s" % node)
12783
12784   def Exec(self, feedback_fn):
12785     """Execute the test delay opcode, with the wanted repetitions.
12786
12787     """
12788     if self.op.repeat == 0:
12789       self._TestDelay()
12790     else:
12791       top_value = self.op.repeat - 1
12792       for i in range(self.op.repeat):
12793         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12794         self._TestDelay()
12795
12796
12797 class LUTestJqueue(NoHooksLU):
12798   """Utility LU to test some aspects of the job queue.
12799
12800   """
12801   REQ_BGL = False
12802
12803   # Must be lower than default timeout for WaitForJobChange to see whether it
12804   # notices changed jobs
12805   _CLIENT_CONNECT_TIMEOUT = 20.0
12806   _CLIENT_CONFIRM_TIMEOUT = 60.0
12807
12808   @classmethod
12809   def _NotifyUsingSocket(cls, cb, errcls):
12810     """Opens a Unix socket and waits for another program to connect.
12811
12812     @type cb: callable
12813     @param cb: Callback to send socket name to client
12814     @type errcls: class
12815     @param errcls: Exception class to use for errors
12816
12817     """
12818     # Using a temporary directory as there's no easy way to create temporary
12819     # sockets without writing a custom loop around tempfile.mktemp and
12820     # socket.bind
12821     tmpdir = tempfile.mkdtemp()
12822     try:
12823       tmpsock = utils.PathJoin(tmpdir, "sock")
12824
12825       logging.debug("Creating temporary socket at %s", tmpsock)
12826       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12827       try:
12828         sock.bind(tmpsock)
12829         sock.listen(1)
12830
12831         # Send details to client
12832         cb(tmpsock)
12833
12834         # Wait for client to connect before continuing
12835         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12836         try:
12837           (conn, _) = sock.accept()
12838         except socket.error, err:
12839           raise errcls("Client didn't connect in time (%s)" % err)
12840       finally:
12841         sock.close()
12842     finally:
12843       # Remove as soon as client is connected
12844       shutil.rmtree(tmpdir)
12845
12846     # Wait for client to close
12847     try:
12848       try:
12849         # pylint: disable=E1101
12850         # Instance of '_socketobject' has no ... member
12851         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12852         conn.recv(1)
12853       except socket.error, err:
12854         raise errcls("Client failed to confirm notification (%s)" % err)
12855     finally:
12856       conn.close()
12857
12858   def _SendNotification(self, test, arg, sockname):
12859     """Sends a notification to the client.
12860
12861     @type test: string
12862     @param test: Test name
12863     @param arg: Test argument (depends on test)
12864     @type sockname: string
12865     @param sockname: Socket path
12866
12867     """
12868     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12869
12870   def _Notify(self, prereq, test, arg):
12871     """Notifies the client of a test.
12872
12873     @type prereq: bool
12874     @param prereq: Whether this is a prereq-phase test
12875     @type test: string
12876     @param test: Test name
12877     @param arg: Test argument (depends on test)
12878
12879     """
12880     if prereq:
12881       errcls = errors.OpPrereqError
12882     else:
12883       errcls = errors.OpExecError
12884
12885     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12886                                                   test, arg),
12887                                    errcls)
12888
12889   def CheckArguments(self):
12890     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12891     self.expandnames_calls = 0
12892
12893   def ExpandNames(self):
12894     checkargs_calls = getattr(self, "checkargs_calls", 0)
12895     if checkargs_calls < 1:
12896       raise errors.ProgrammerError("CheckArguments was not called")
12897
12898     self.expandnames_calls += 1
12899
12900     if self.op.notify_waitlock:
12901       self._Notify(True, constants.JQT_EXPANDNAMES, None)
12902
12903     self.LogInfo("Expanding names")
12904
12905     # Get lock on master node (just to get a lock, not for a particular reason)
12906     self.needed_locks = {
12907       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12908       }
12909
12910   def Exec(self, feedback_fn):
12911     if self.expandnames_calls < 1:
12912       raise errors.ProgrammerError("ExpandNames was not called")
12913
12914     if self.op.notify_exec:
12915       self._Notify(False, constants.JQT_EXEC, None)
12916
12917     self.LogInfo("Executing")
12918
12919     if self.op.log_messages:
12920       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12921       for idx, msg in enumerate(self.op.log_messages):
12922         self.LogInfo("Sending log message %s", idx + 1)
12923         feedback_fn(constants.JQT_MSGPREFIX + msg)
12924         # Report how many test messages have been sent
12925         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12926
12927     if self.op.fail:
12928       raise errors.OpExecError("Opcode failure was requested")
12929
12930     return True
12931
12932
12933 class IAllocator(object):
12934   """IAllocator framework.
12935
12936   An IAllocator instance has three sets of attributes:
12937     - cfg that is needed to query the cluster
12938     - input data (all members of the _KEYS class attribute are required)
12939     - four buffer attributes (in|out_data|text), that represent the
12940       input (to the external script) in text and data structure format,
12941       and the output from it, again in two formats
12942     - the result variables from the script (success, info, nodes) for
12943       easy usage
12944
12945   """
12946   # pylint: disable=R0902
12947   # lots of instance attributes
12948
12949   def __init__(self, cfg, rpc, mode, **kwargs):
12950     self.cfg = cfg
12951     self.rpc = rpc
12952     # init buffer variables
12953     self.in_text = self.out_text = self.in_data = self.out_data = None
12954     # init all input fields so that pylint is happy
12955     self.mode = mode
12956     self.memory = self.disks = self.disk_template = None
12957     self.os = self.tags = self.nics = self.vcpus = None
12958     self.hypervisor = None
12959     self.relocate_from = None
12960     self.name = None
12961     self.instances = None
12962     self.evac_mode = None
12963     self.target_groups = []
12964     # computed fields
12965     self.required_nodes = None
12966     # init result fields
12967     self.success = self.info = self.result = None
12968
12969     try:
12970       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12971     except KeyError:
12972       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12973                                    " IAllocator" % self.mode)
12974
12975     keyset = [n for (n, _) in keydata]
12976
12977     for key in kwargs:
12978       if key not in keyset:
12979         raise errors.ProgrammerError("Invalid input parameter '%s' to"
12980                                      " IAllocator" % key)
12981       setattr(self, key, kwargs[key])
12982
12983     for key in keyset:
12984       if key not in kwargs:
12985         raise errors.ProgrammerError("Missing input parameter '%s' to"
12986                                      " IAllocator" % key)
12987     self._BuildInputData(compat.partial(fn, self), keydata)
12988
12989   def _ComputeClusterData(self):
12990     """Compute the generic allocator input data.
12991
12992     This is the data that is independent of the actual operation.
12993
12994     """
12995     cfg = self.cfg
12996     cluster_info = cfg.GetClusterInfo()
12997     # cluster data
12998     data = {
12999       "version": constants.IALLOCATOR_VERSION,
13000       "cluster_name": cfg.GetClusterName(),
13001       "cluster_tags": list(cluster_info.GetTags()),
13002       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13003       # we don't have job IDs
13004       }
13005     ninfo = cfg.GetAllNodesInfo()
13006     iinfo = cfg.GetAllInstancesInfo().values()
13007     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13008
13009     # node data
13010     node_list = [n.name for n in ninfo.values() if n.vm_capable]
13011
13012     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13013       hypervisor_name = self.hypervisor
13014     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13015       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13016     else:
13017       hypervisor_name = cluster_info.enabled_hypervisors[0]
13018
13019     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13020                                         hypervisor_name)
13021     node_iinfo = \
13022       self.rpc.call_all_instances_info(node_list,
13023                                        cluster_info.enabled_hypervisors)
13024
13025     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13026
13027     config_ndata = self._ComputeBasicNodeData(ninfo)
13028     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13029                                                  i_list, config_ndata)
13030     assert len(data["nodes"]) == len(ninfo), \
13031         "Incomplete node data computed"
13032
13033     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13034
13035     self.in_data = data
13036
13037   @staticmethod
13038   def _ComputeNodeGroupData(cfg):
13039     """Compute node groups data.
13040
13041     """
13042     ng = dict((guuid, {
13043       "name": gdata.name,
13044       "alloc_policy": gdata.alloc_policy,
13045       })
13046       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13047
13048     return ng
13049
13050   @staticmethod
13051   def _ComputeBasicNodeData(node_cfg):
13052     """Compute global node data.
13053
13054     @rtype: dict
13055     @returns: a dict of name: (node dict, node config)
13056
13057     """
13058     # fill in static (config-based) values
13059     node_results = dict((ninfo.name, {
13060       "tags": list(ninfo.GetTags()),
13061       "primary_ip": ninfo.primary_ip,
13062       "secondary_ip": ninfo.secondary_ip,
13063       "offline": ninfo.offline,
13064       "drained": ninfo.drained,
13065       "master_candidate": ninfo.master_candidate,
13066       "group": ninfo.group,
13067       "master_capable": ninfo.master_capable,
13068       "vm_capable": ninfo.vm_capable,
13069       })
13070       for ninfo in node_cfg.values())
13071
13072     return node_results
13073
13074   @staticmethod
13075   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13076                               node_results):
13077     """Compute global node data.
13078
13079     @param node_results: the basic node structures as filled from the config
13080
13081     """
13082     # make a copy of the current dict
13083     node_results = dict(node_results)
13084     for nname, nresult in node_data.items():
13085       assert nname in node_results, "Missing basic data for node %s" % nname
13086       ninfo = node_cfg[nname]
13087
13088       if not (ninfo.offline or ninfo.drained):
13089         nresult.Raise("Can't get data for node %s" % nname)
13090         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13091                                 nname)
13092         remote_info = nresult.payload
13093
13094         for attr in ["memory_total", "memory_free", "memory_dom0",
13095                      "vg_size", "vg_free", "cpu_total"]:
13096           if attr not in remote_info:
13097             raise errors.OpExecError("Node '%s' didn't return attribute"
13098                                      " '%s'" % (nname, attr))
13099           if not isinstance(remote_info[attr], int):
13100             raise errors.OpExecError("Node '%s' returned invalid value"
13101                                      " for '%s': %s" %
13102                                      (nname, attr, remote_info[attr]))
13103         # compute memory used by primary instances
13104         i_p_mem = i_p_up_mem = 0
13105         for iinfo, beinfo in i_list:
13106           if iinfo.primary_node == nname:
13107             i_p_mem += beinfo[constants.BE_MEMORY]
13108             if iinfo.name not in node_iinfo[nname].payload:
13109               i_used_mem = 0
13110             else:
13111               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13112             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13113             remote_info["memory_free"] -= max(0, i_mem_diff)
13114
13115             if iinfo.admin_up:
13116               i_p_up_mem += beinfo[constants.BE_MEMORY]
13117
13118         # compute memory used by instances
13119         pnr_dyn = {
13120           "total_memory": remote_info["memory_total"],
13121           "reserved_memory": remote_info["memory_dom0"],
13122           "free_memory": remote_info["memory_free"],
13123           "total_disk": remote_info["vg_size"],
13124           "free_disk": remote_info["vg_free"],
13125           "total_cpus": remote_info["cpu_total"],
13126           "i_pri_memory": i_p_mem,
13127           "i_pri_up_memory": i_p_up_mem,
13128           }
13129         pnr_dyn.update(node_results[nname])
13130         node_results[nname] = pnr_dyn
13131
13132     return node_results
13133
13134   @staticmethod
13135   def _ComputeInstanceData(cluster_info, i_list):
13136     """Compute global instance data.
13137
13138     """
13139     instance_data = {}
13140     for iinfo, beinfo in i_list:
13141       nic_data = []
13142       for nic in iinfo.nics:
13143         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13144         nic_dict = {
13145           "mac": nic.mac,
13146           "ip": nic.ip,
13147           "mode": filled_params[constants.NIC_MODE],
13148           "link": filled_params[constants.NIC_LINK],
13149           }
13150         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13151           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13152         nic_data.append(nic_dict)
13153       pir = {
13154         "tags": list(iinfo.GetTags()),
13155         "admin_up": iinfo.admin_up,
13156         "vcpus": beinfo[constants.BE_VCPUS],
13157         "memory": beinfo[constants.BE_MEMORY],
13158         "os": iinfo.os,
13159         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13160         "nics": nic_data,
13161         "disks": [{constants.IDISK_SIZE: dsk.size,
13162                    constants.IDISK_MODE: dsk.mode}
13163                   for dsk in iinfo.disks],
13164         "disk_template": iinfo.disk_template,
13165         "hypervisor": iinfo.hypervisor,
13166         }
13167       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13168                                                  pir["disks"])
13169       instance_data[iinfo.name] = pir
13170
13171     return instance_data
13172
13173   def _AddNewInstance(self):
13174     """Add new instance data to allocator structure.
13175
13176     This in combination with _AllocatorGetClusterData will create the
13177     correct structure needed as input for the allocator.
13178
13179     The checks for the completeness of the opcode must have already been
13180     done.
13181
13182     """
13183     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13184
13185     if self.disk_template in constants.DTS_INT_MIRROR:
13186       self.required_nodes = 2
13187     else:
13188       self.required_nodes = 1
13189
13190     request = {
13191       "name": self.name,
13192       "disk_template": self.disk_template,
13193       "tags": self.tags,
13194       "os": self.os,
13195       "vcpus": self.vcpus,
13196       "memory": self.memory,
13197       "disks": self.disks,
13198       "disk_space_total": disk_space,
13199       "nics": self.nics,
13200       "required_nodes": self.required_nodes,
13201       "hypervisor": self.hypervisor,
13202       }
13203
13204     return request
13205
13206   def _AddRelocateInstance(self):
13207     """Add relocate instance data to allocator structure.
13208
13209     This in combination with _IAllocatorGetClusterData will create the
13210     correct structure needed as input for the allocator.
13211
13212     The checks for the completeness of the opcode must have already been
13213     done.
13214
13215     """
13216     instance = self.cfg.GetInstanceInfo(self.name)
13217     if instance is None:
13218       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13219                                    " IAllocator" % self.name)
13220
13221     if instance.disk_template not in constants.DTS_MIRRORED:
13222       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13223                                  errors.ECODE_INVAL)
13224
13225     if instance.disk_template in constants.DTS_INT_MIRROR and \
13226         len(instance.secondary_nodes) != 1:
13227       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13228                                  errors.ECODE_STATE)
13229
13230     self.required_nodes = 1
13231     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13232     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13233
13234     request = {
13235       "name": self.name,
13236       "disk_space_total": disk_space,
13237       "required_nodes": self.required_nodes,
13238       "relocate_from": self.relocate_from,
13239       }
13240     return request
13241
13242   def _AddNodeEvacuate(self):
13243     """Get data for node-evacuate requests.
13244
13245     """
13246     return {
13247       "instances": self.instances,
13248       "evac_mode": self.evac_mode,
13249       }
13250
13251   def _AddChangeGroup(self):
13252     """Get data for node-evacuate requests.
13253
13254     """
13255     return {
13256       "instances": self.instances,
13257       "target_groups": self.target_groups,
13258       }
13259
13260   def _BuildInputData(self, fn, keydata):
13261     """Build input data structures.
13262
13263     """
13264     self._ComputeClusterData()
13265
13266     request = fn()
13267     request["type"] = self.mode
13268     for keyname, keytype in keydata:
13269       if keyname not in request:
13270         raise errors.ProgrammerError("Request parameter %s is missing" %
13271                                      keyname)
13272       val = request[keyname]
13273       if not keytype(val):
13274         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13275                                      " validation, value %s, expected"
13276                                      " type %s" % (keyname, val, keytype))
13277     self.in_data["request"] = request
13278
13279     self.in_text = serializer.Dump(self.in_data)
13280
13281   _STRING_LIST = ht.TListOf(ht.TString)
13282   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13283      # pylint: disable=E1101
13284      # Class '...' has no 'OP_ID' member
13285      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13286                           opcodes.OpInstanceMigrate.OP_ID,
13287                           opcodes.OpInstanceReplaceDisks.OP_ID])
13288      })))
13289
13290   _NEVAC_MOVED = \
13291     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13292                        ht.TItems([ht.TNonEmptyString,
13293                                   ht.TNonEmptyString,
13294                                   ht.TListOf(ht.TNonEmptyString),
13295                                  ])))
13296   _NEVAC_FAILED = \
13297     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13298                        ht.TItems([ht.TNonEmptyString,
13299                                   ht.TMaybeString,
13300                                  ])))
13301   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13302                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13303
13304   _MODE_DATA = {
13305     constants.IALLOCATOR_MODE_ALLOC:
13306       (_AddNewInstance,
13307        [
13308         ("name", ht.TString),
13309         ("memory", ht.TInt),
13310         ("disks", ht.TListOf(ht.TDict)),
13311         ("disk_template", ht.TString),
13312         ("os", ht.TString),
13313         ("tags", _STRING_LIST),
13314         ("nics", ht.TListOf(ht.TDict)),
13315         ("vcpus", ht.TInt),
13316         ("hypervisor", ht.TString),
13317         ], ht.TList),
13318     constants.IALLOCATOR_MODE_RELOC:
13319       (_AddRelocateInstance,
13320        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13321        ht.TList),
13322      constants.IALLOCATOR_MODE_NODE_EVAC:
13323       (_AddNodeEvacuate, [
13324         ("instances", _STRING_LIST),
13325         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13326         ], _NEVAC_RESULT),
13327      constants.IALLOCATOR_MODE_CHG_GROUP:
13328       (_AddChangeGroup, [
13329         ("instances", _STRING_LIST),
13330         ("target_groups", _STRING_LIST),
13331         ], _NEVAC_RESULT),
13332     }
13333
13334   def Run(self, name, validate=True, call_fn=None):
13335     """Run an instance allocator and return the results.
13336
13337     """
13338     if call_fn is None:
13339       call_fn = self.rpc.call_iallocator_runner
13340
13341     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13342     result.Raise("Failure while running the iallocator script")
13343
13344     self.out_text = result.payload
13345     if validate:
13346       self._ValidateResult()
13347
13348   def _ValidateResult(self):
13349     """Process the allocator results.
13350
13351     This will process and if successful save the result in
13352     self.out_data and the other parameters.
13353
13354     """
13355     try:
13356       rdict = serializer.Load(self.out_text)
13357     except Exception, err:
13358       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13359
13360     if not isinstance(rdict, dict):
13361       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13362
13363     # TODO: remove backwards compatiblity in later versions
13364     if "nodes" in rdict and "result" not in rdict:
13365       rdict["result"] = rdict["nodes"]
13366       del rdict["nodes"]
13367
13368     for key in "success", "info", "result":
13369       if key not in rdict:
13370         raise errors.OpExecError("Can't parse iallocator results:"
13371                                  " missing key '%s'" % key)
13372       setattr(self, key, rdict[key])
13373
13374     if not self._result_check(self.result):
13375       raise errors.OpExecError("Iallocator returned invalid result,"
13376                                " expected %s, got %s" %
13377                                (self._result_check, self.result),
13378                                errors.ECODE_INVAL)
13379
13380     if self.mode == constants.IALLOCATOR_MODE_RELOC:
13381       assert self.relocate_from is not None
13382       assert self.required_nodes == 1
13383
13384       node2group = dict((name, ndata["group"])
13385                         for (name, ndata) in self.in_data["nodes"].items())
13386
13387       fn = compat.partial(self._NodesToGroups, node2group,
13388                           self.in_data["nodegroups"])
13389
13390       instance = self.cfg.GetInstanceInfo(self.name)
13391       request_groups = fn(self.relocate_from + [instance.primary_node])
13392       result_groups = fn(rdict["result"] + [instance.primary_node])
13393
13394       if self.success and not set(result_groups).issubset(request_groups):
13395         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13396                                  " differ from original groups (%s)" %
13397                                  (utils.CommaJoin(result_groups),
13398                                   utils.CommaJoin(request_groups)))
13399
13400     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13401       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13402
13403     self.out_data = rdict
13404
13405   @staticmethod
13406   def _NodesToGroups(node2group, groups, nodes):
13407     """Returns a list of unique group names for a list of nodes.
13408
13409     @type node2group: dict
13410     @param node2group: Map from node name to group UUID
13411     @type groups: dict
13412     @param groups: Group information
13413     @type nodes: list
13414     @param nodes: Node names
13415
13416     """
13417     result = set()
13418
13419     for node in nodes:
13420       try:
13421         group_uuid = node2group[node]
13422       except KeyError:
13423         # Ignore unknown node
13424         pass
13425       else:
13426         try:
13427           group = groups[group_uuid]
13428         except KeyError:
13429           # Can't find group, let's use UUID
13430           group_name = group_uuid
13431         else:
13432           group_name = group["name"]
13433
13434         result.add(group_name)
13435
13436     return sorted(result)
13437
13438
13439 class LUTestAllocator(NoHooksLU):
13440   """Run allocator tests.
13441
13442   This LU runs the allocator tests
13443
13444   """
13445   def CheckPrereq(self):
13446     """Check prerequisites.
13447
13448     This checks the opcode parameters depending on the director and mode test.
13449
13450     """
13451     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13452       for attr in ["memory", "disks", "disk_template",
13453                    "os", "tags", "nics", "vcpus"]:
13454         if not hasattr(self.op, attr):
13455           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13456                                      attr, errors.ECODE_INVAL)
13457       iname = self.cfg.ExpandInstanceName(self.op.name)
13458       if iname is not None:
13459         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13460                                    iname, errors.ECODE_EXISTS)
13461       if not isinstance(self.op.nics, list):
13462         raise errors.OpPrereqError("Invalid parameter 'nics'",
13463                                    errors.ECODE_INVAL)
13464       if not isinstance(self.op.disks, list):
13465         raise errors.OpPrereqError("Invalid parameter 'disks'",
13466                                    errors.ECODE_INVAL)
13467       for row in self.op.disks:
13468         if (not isinstance(row, dict) or
13469             constants.IDISK_SIZE not in row or
13470             not isinstance(row[constants.IDISK_SIZE], int) or
13471             constants.IDISK_MODE not in row or
13472             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13473           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13474                                      " parameter", errors.ECODE_INVAL)
13475       if self.op.hypervisor is None:
13476         self.op.hypervisor = self.cfg.GetHypervisorType()
13477     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13478       fname = _ExpandInstanceName(self.cfg, self.op.name)
13479       self.op.name = fname
13480       self.relocate_from = \
13481           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13482     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13483                           constants.IALLOCATOR_MODE_NODE_EVAC):
13484       if not self.op.instances:
13485         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13486       self.op.instances = _GetWantedInstances(self, self.op.instances)
13487     else:
13488       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13489                                  self.op.mode, errors.ECODE_INVAL)
13490
13491     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13492       if self.op.allocator is None:
13493         raise errors.OpPrereqError("Missing allocator name",
13494                                    errors.ECODE_INVAL)
13495     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13496       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13497                                  self.op.direction, errors.ECODE_INVAL)
13498
13499   def Exec(self, feedback_fn):
13500     """Run the allocator test.
13501
13502     """
13503     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13504       ial = IAllocator(self.cfg, self.rpc,
13505                        mode=self.op.mode,
13506                        name=self.op.name,
13507                        memory=self.op.memory,
13508                        disks=self.op.disks,
13509                        disk_template=self.op.disk_template,
13510                        os=self.op.os,
13511                        tags=self.op.tags,
13512                        nics=self.op.nics,
13513                        vcpus=self.op.vcpus,
13514                        hypervisor=self.op.hypervisor,
13515                        )
13516     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13517       ial = IAllocator(self.cfg, self.rpc,
13518                        mode=self.op.mode,
13519                        name=self.op.name,
13520                        relocate_from=list(self.relocate_from),
13521                        )
13522     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13523       ial = IAllocator(self.cfg, self.rpc,
13524                        mode=self.op.mode,
13525                        instances=self.op.instances,
13526                        target_groups=self.op.target_groups)
13527     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13528       ial = IAllocator(self.cfg, self.rpc,
13529                        mode=self.op.mode,
13530                        instances=self.op.instances,
13531                        evac_mode=self.op.evac_mode)
13532     else:
13533       raise errors.ProgrammerError("Uncatched mode %s in"
13534                                    " LUTestAllocator.Exec", self.op.mode)
13535
13536     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13537       result = ial.in_text
13538     else:
13539       ial.Run(self.op.allocator, validate=False)
13540       result = ial.out_text
13541     return result
13542
13543
13544 #: Query type implementations
13545 _QUERY_IMPL = {
13546   constants.QR_INSTANCE: _InstanceQuery,
13547   constants.QR_NODE: _NodeQuery,
13548   constants.QR_GROUP: _GroupQuery,
13549   constants.QR_OS: _OsQuery,
13550   }
13551
13552 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13553
13554
13555 def _GetQueryImplementation(name):
13556   """Returns the implemtnation for a query type.
13557
13558   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13559
13560   """
13561   try:
13562     return _QUERY_IMPL[name]
13563   except KeyError:
13564     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13565                                errors.ECODE_INVAL)