code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62
  63 import ganeti.masterd.instance # pylint: disable=W0611
  64
  65
  66 class ResultWithJobs:
  67   """Data container for LU results with jobs.
  68
  69   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  70   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  71   contained in the C{jobs} attribute and include the job IDs in the opcode
  72   result.
  73
  74   """
  75   def __init__(self, jobs, **kwargs):
  76     """Initializes this class.
  77
  78     Additional return values can be specified as keyword arguments.
  79
  80     @type jobs: list of lists of L{opcode.OpCode}
  81     @param jobs: A list of lists of opcode objects
  82
  83     """
  84     self.jobs = jobs
  85     self.other = kwargs
  86
  87
  88 class LogicalUnit(object):
  89   """Logical Unit base class.
  90
  91   Subclasses must follow these rules:
  92     - implement ExpandNames
  93     - implement CheckPrereq (except when tasklets are used)
  94     - implement Exec (except when tasklets are used)
  95     - implement BuildHooksEnv
  96     - implement BuildHooksNodes
  97     - redefine HPATH and HTYPE
  98     - optionally redefine their run requirements:
  99         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 100
 101   Note that all commands require root permissions.
 102
 103   @ivar dry_run_result: the value (if any) that will be returned to the caller
 104       in dry-run mode (signalled by opcode dry_run parameter)
 105
 106   """
 107   HPATH = None
 108   HTYPE = None
 109   REQ_BGL = True
 110
 111   def __init__(self, processor, op, context, rpc):
 112     """Constructor for LogicalUnit.
 113
 114     This needs to be overridden in derived classes in order to check op
 115     validity.
 116
 117     """
 118     self.proc = processor
 119     self.op = op
 120     self.cfg = context.cfg
 121     self.glm = context.glm
 122     # readability alias
 123     self.owned_locks = context.glm.list_owned
 124     self.context = context
 125     self.rpc = rpc
 126     # Dicts used to declare locking needs to mcpu
 127     self.needed_locks = None
 128     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 129     self.add_locks = {}
 130     self.remove_locks = {}
 131     # Used to force good behavior when calling helper functions
 132     self.recalculate_locks = {}
 133     # logging
 134     self.Log = processor.Log # pylint: disable=C0103
 135     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 136     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 137     self.LogStep = processor.LogStep # pylint: disable=C0103
 138     # support for dry-run
 139     self.dry_run_result = None
 140     # support for generic debug attribute
 141     if (not hasattr(self.op, "debug_level") or
 142         not isinstance(self.op.debug_level, int)):
 143       self.op.debug_level = 0
 144
 145     # Tasklets
 146     self.tasklets = None
 147
 148     # Validate opcode parameters and set defaults
 149     self.op.Validate(True)
 150
 151     self.CheckArguments()
 152
 153   def CheckArguments(self):
 154     """Check syntactic validity for the opcode arguments.
 155
 156     This method is for doing a simple syntactic check and ensure
 157     validity of opcode parameters, without any cluster-related
 158     checks. While the same can be accomplished in ExpandNames and/or
 159     CheckPrereq, doing these separate is better because:
 160
 161       - ExpandNames is left as as purely a lock-related function
 162       - CheckPrereq is run after we have acquired locks (and possible
 163         waited for them)
 164
 165     The function is allowed to change the self.op attribute so that
 166     later methods can no longer worry about missing parameters.
 167
 168     """
 169     pass
 170
 171   def ExpandNames(self):
 172     """Expand names for this LU.
 173
 174     This method is called before starting to execute the opcode, and it should
 175     update all the parameters of the opcode to their canonical form (e.g. a
 176     short node name must be fully expanded after this method has successfully
 177     completed). This way locking, hooks, logging, etc. can work correctly.
 178
 179     LUs which implement this method must also populate the self.needed_locks
 180     member, as a dict with lock levels as keys, and a list of needed lock names
 181     as values. Rules:
 182
 183       - use an empty dict if you don't need any lock
 184       - if you don't need any lock at a particular level omit that level
 185       - don't put anything for the BGL level
 186       - if you want all locks at a level use locking.ALL_SET as a value
 187
 188     If you need to share locks (rather than acquire them exclusively) at one
 189     level you can modify self.share_locks, setting a true value (usually 1) for
 190     that level. By default locks are not shared.
 191
 192     This function can also define a list of tasklets, which then will be
 193     executed in order instead of the usual LU-level CheckPrereq and Exec
 194     functions, if those are not defined by the LU.
 195
 196     Examples::
 197
 198       # Acquire all nodes and one instance
 199       self.needed_locks = {
 200         locking.LEVEL_NODE: locking.ALL_SET,
 201         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 202       }
 203       # Acquire just two nodes
 204       self.needed_locks = {
 205         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 206       }
 207       # Acquire no locks
 208       self.needed_locks = {} # No, you can't leave it to the default value None
 209
 210     """
 211     # The implementation of this method is mandatory only if the new LU is
 212     # concurrent, so that old LUs don't need to be changed all at the same
 213     # time.
 214     if self.REQ_BGL:
 215       self.needed_locks = {} # Exclusive LUs don't need locks.
 216     else:
 217       raise NotImplementedError
 218
 219   def DeclareLocks(self, level):
 220     """Declare LU locking needs for a level
 221
 222     While most LUs can just declare their locking needs at ExpandNames time,
 223     sometimes there's the need to calculate some locks after having acquired
 224     the ones before. This function is called just before acquiring locks at a
 225     particular level, but after acquiring the ones at lower levels, and permits
 226     such calculations. It can be used to modify self.needed_locks, and by
 227     default it does nothing.
 228
 229     This function is only called if you have something already set in
 230     self.needed_locks for the level.
 231
 232     @param level: Locking level which is going to be locked
 233     @type level: member of ganeti.locking.LEVELS
 234
 235     """
 236
 237   def CheckPrereq(self):
 238     """Check prerequisites for this LU.
 239
 240     This method should check that the prerequisites for the execution
 241     of this LU are fulfilled. It can do internode communication, but
 242     it should be idempotent - no cluster or system changes are
 243     allowed.
 244
 245     The method should raise errors.OpPrereqError in case something is
 246     not fulfilled. Its return value is ignored.
 247
 248     This method should also update all the parameters of the opcode to
 249     their canonical form if it hasn't been done by ExpandNames before.
 250
 251     """
 252     if self.tasklets is not None:
 253       for (idx, tl) in enumerate(self.tasklets):
 254         logging.debug("Checking prerequisites for tasklet %s/%s",
 255                       idx + 1, len(self.tasklets))
 256         tl.CheckPrereq()
 257     else:
 258       pass
 259
 260   def Exec(self, feedback_fn):
 261     """Execute the LU.
 262
 263     This method should implement the actual work. It should raise
 264     errors.OpExecError for failures that are somewhat dealt with in
 265     code, or expected.
 266
 267     """
 268     if self.tasklets is not None:
 269       for (idx, tl) in enumerate(self.tasklets):
 270         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 271         tl.Exec(feedback_fn)
 272     else:
 273       raise NotImplementedError
 274
 275   def BuildHooksEnv(self):
 276     """Build hooks environment for this LU.
 277
 278     @rtype: dict
 279     @return: Dictionary containing the environment that will be used for
 280       running the hooks for this LU. The keys of the dict must not be prefixed
 281       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 282       will extend the environment with additional variables. If no environment
 283       should be defined, an empty dictionary should be returned (not C{None}).
 284     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 285       will not be called.
 286
 287     """
 288     raise NotImplementedError
 289
 290   def BuildHooksNodes(self):
 291     """Build list of nodes to run LU's hooks.
 292
 293     @rtype: tuple; (list, list)
 294     @return: Tuple containing a list of node names on which the hook
 295       should run before the execution and a list of node names on which the
 296       hook should run after the execution. No nodes should be returned as an
 297       empty list (and not None).
 298     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 299       will not be called.
 300
 301     """
 302     raise NotImplementedError
 303
 304   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 305     """Notify the LU about the results of its hooks.
 306
 307     This method is called every time a hooks phase is executed, and notifies
 308     the Logical Unit about the hooks' result. The LU can then use it to alter
 309     its result based on the hooks.  By default the method does nothing and the
 310     previous result is passed back unchanged but any LU can define it if it
 311     wants to use the local cluster hook-scripts somehow.
 312
 313     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 314         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 315     @param hook_results: the results of the multi-node hooks rpc call
 316     @param feedback_fn: function used send feedback back to the caller
 317     @param lu_result: the previous Exec result this LU had, or None
 318         in the PRE phase
 319     @return: the new Exec result, based on the previous result
 320         and hook results
 321
 322     """
 323     # API must be kept, thus we ignore the unused argument and could
 324     # be a function warnings
 325     # pylint: disable=W0613,R0201
 326     return lu_result
 327
 328   def _ExpandAndLockInstance(self):
 329     """Helper function to expand and lock an instance.
 330
 331     Many LUs that work on an instance take its name in self.op.instance_name
 332     and need to expand it and then declare the expanded name for locking. This
 333     function does it, and then updates self.op.instance_name to the expanded
 334     name. It also initializes needed_locks as a dict, if this hasn't been done
 335     before.
 336
 337     """
 338     if self.needed_locks is None:
 339       self.needed_locks = {}
 340     else:
 341       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 342         "_ExpandAndLockInstance called with instance-level locks set"
 343     self.op.instance_name = _ExpandInstanceName(self.cfg,
 344                                                 self.op.instance_name)
 345     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 346
 347   def _LockInstancesNodes(self, primary_only=False):
 348     """Helper function to declare instances' nodes for locking.
 349
 350     This function should be called after locking one or more instances to lock
 351     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 352     with all primary or secondary nodes for instances already locked and
 353     present in self.needed_locks[locking.LEVEL_INSTANCE].
 354
 355     It should be called from DeclareLocks, and for safety only works if
 356     self.recalculate_locks[locking.LEVEL_NODE] is set.
 357
 358     In the future it may grow parameters to just lock some instance's nodes, or
 359     to just lock primaries or secondary nodes, if needed.
 360
 361     If should be called in DeclareLocks in a way similar to::
 362
 363       if level == locking.LEVEL_NODE:
 364         self._LockInstancesNodes()
 365
 366     @type primary_only: boolean
 367     @param primary_only: only lock primary nodes of locked instances
 368
 369     """
 370     assert locking.LEVEL_NODE in self.recalculate_locks, \
 371       "_LockInstancesNodes helper function called with no nodes to recalculate"
 372
 373     # TODO: check if we're really been called with the instance locks held
 374
 375     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 376     # future we might want to have different behaviors depending on the value
 377     # of self.recalculate_locks[locking.LEVEL_NODE]
 378     wanted_nodes = []
 379     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 380     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 381       wanted_nodes.append(instance.primary_node)
 382       if not primary_only:
 383         wanted_nodes.extend(instance.secondary_nodes)
 384
 385     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 386       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 387     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 388       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 389
 390     del self.recalculate_locks[locking.LEVEL_NODE]
 391
 392
 393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 394   """Simple LU which runs no hooks.
 395
 396   This LU is intended as a parent for other LogicalUnits which will
 397   run no hooks, in order to reduce duplicate code.
 398
 399   """
 400   HPATH = None
 401   HTYPE = None
 402
 403   def BuildHooksEnv(self):
 404     """Empty BuildHooksEnv for NoHooksLu.
 405
 406     This just raises an error.
 407
 408     """
 409     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 410
 411   def BuildHooksNodes(self):
 412     """Empty BuildHooksNodes for NoHooksLU.
 413
 414     """
 415     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 416
 417
 418 class Tasklet:
 419   """Tasklet base class.
 420
 421   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 422   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 423   tasklets know nothing about locks.
 424
 425   Subclasses must follow these rules:
 426     - Implement CheckPrereq
 427     - Implement Exec
 428
 429   """
 430   def __init__(self, lu):
 431     self.lu = lu
 432
 433     # Shortcuts
 434     self.cfg = lu.cfg
 435     self.rpc = lu.rpc
 436
 437   def CheckPrereq(self):
 438     """Check prerequisites for this tasklets.
 439
 440     This method should check whether the prerequisites for the execution of
 441     this tasklet are fulfilled. It can do internode communication, but it
 442     should be idempotent - no cluster or system changes are allowed.
 443
 444     The method should raise errors.OpPrereqError in case something is not
 445     fulfilled. Its return value is ignored.
 446
 447     This method should also update all parameters to their canonical form if it
 448     hasn't been done before.
 449
 450     """
 451     pass
 452
 453   def Exec(self, feedback_fn):
 454     """Execute the tasklet.
 455
 456     This method should implement the actual work. It should raise
 457     errors.OpExecError for failures that are somewhat dealt with in code, or
 458     expected.
 459
 460     """
 461     raise NotImplementedError
 462
 463
 464 class _QueryBase:
 465   """Base for query utility classes.
 466
 467   """
 468   #: Attribute holding field definitions
 469   FIELDS = None
 470
 471   def __init__(self, filter_, fields, use_locking):
 472     """Initializes this class.
 473
 474     """
 475     self.use_locking = use_locking
 476
 477     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 478                              namefield="name")
 479     self.requested_data = self.query.RequestedData()
 480     self.names = self.query.RequestedNames()
 481
 482     # Sort only if no names were requested
 483     self.sort_by_name = not self.names
 484
 485     self.do_locking = None
 486     self.wanted = None
 487
 488   def _GetNames(self, lu, all_names, lock_level):
 489     """Helper function to determine names asked for in the query.
 490
 491     """
 492     if self.do_locking:
 493       names = lu.owned_locks(lock_level)
 494     else:
 495       names = all_names
 496
 497     if self.wanted == locking.ALL_SET:
 498       assert not self.names
 499       # caller didn't specify names, so ordering is not important
 500       return utils.NiceSort(names)
 501
 502     # caller specified names and we must keep the same order
 503     assert self.names
 504     assert not self.do_locking or lu.glm.is_owned(lock_level)
 505
 506     missing = set(self.wanted).difference(names)
 507     if missing:
 508       raise errors.OpExecError("Some items were removed before retrieving"
 509                                " their data: %s" % missing)
 510
 511     # Return expanded names
 512     return self.wanted
 513
 514   def ExpandNames(self, lu):
 515     """Expand names for this query.
 516
 517     See L{LogicalUnit.ExpandNames}.
 518
 519     """
 520     raise NotImplementedError()
 521
 522   def DeclareLocks(self, lu, level):
 523     """Declare locks for this query.
 524
 525     See L{LogicalUnit.DeclareLocks}.
 526
 527     """
 528     raise NotImplementedError()
 529
 530   def _GetQueryData(self, lu):
 531     """Collects all data for this query.
 532
 533     @return: Query data object
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def NewStyleQuery(self, lu):
 539     """Collect data and execute query.
 540
 541     """
 542     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 543                                   sort_by_name=self.sort_by_name)
 544
 545   def OldStyleQuery(self, lu):
 546     """Collect data and execute query.
 547
 548     """
 549     return self.query.OldStyleQuery(self._GetQueryData(lu),
 550                                     sort_by_name=self.sort_by_name)
 551
 552
 553 def _ShareAll():
 554   """Returns a dict declaring all lock levels shared.
 555
 556   """
 557   return dict.fromkeys(locking.LEVELS, 1)
 558
 559
 560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 561   """Checks if the owned node groups are still correct for an instance.
 562
 563   @type cfg: L{config.ConfigWriter}
 564   @param cfg: The cluster configuration
 565   @type instance_name: string
 566   @param instance_name: Instance name
 567   @type owned_groups: set or frozenset
 568   @param owned_groups: List of currently owned node groups
 569
 570   """
 571   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 572
 573   if not owned_groups.issuperset(inst_groups):
 574     raise errors.OpPrereqError("Instance %s's node groups changed since"
 575                                " locks were acquired, current groups are"
 576                                " are '%s', owning groups '%s'; retry the"
 577                                " operation" %
 578                                (instance_name,
 579                                 utils.CommaJoin(inst_groups),
 580                                 utils.CommaJoin(owned_groups)),
 581                                errors.ECODE_STATE)
 582
 583   return inst_groups
 584
 585
 586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 587   """Checks if the instances in a node group are still correct.
 588
 589   @type cfg: L{config.ConfigWriter}
 590   @param cfg: The cluster configuration
 591   @type group_uuid: string
 592   @param group_uuid: Node group UUID
 593   @type owned_instances: set or frozenset
 594   @param owned_instances: List of currently owned instances
 595
 596   """
 597   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 598   if owned_instances != wanted_instances:
 599     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 600                                " locks were acquired, wanted '%s', have '%s';"
 601                                " retry the operation" %
 602                                (group_uuid,
 603                                 utils.CommaJoin(wanted_instances),
 604                                 utils.CommaJoin(owned_instances)),
 605                                errors.ECODE_STATE)
 606
 607   return wanted_instances
 608
 609
 610 def _SupportsOob(cfg, node):
 611   """Tells if node supports OOB.
 612
 613   @type cfg: L{config.ConfigWriter}
 614   @param cfg: The cluster configuration
 615   @type node: L{objects.Node}
 616   @param node: The node
 617   @return: The OOB script if supported or an empty string otherwise
 618
 619   """
 620   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 621
 622
 623 def _GetWantedNodes(lu, nodes):
 624   """Returns list of checked and expanded node names.
 625
 626   @type lu: L{LogicalUnit}
 627   @param lu: the logical unit on whose behalf we execute
 628   @type nodes: list
 629   @param nodes: list of node names or None for all nodes
 630   @rtype: list
 631   @return: the list of nodes, sorted
 632   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 633
 634   """
 635   if nodes:
 636     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 637
 638   return utils.NiceSort(lu.cfg.GetNodeList())
 639
 640
 641 def _GetWantedInstances(lu, instances):
 642   """Returns list of checked and expanded instance names.
 643
 644   @type lu: L{LogicalUnit}
 645   @param lu: the logical unit on whose behalf we execute
 646   @type instances: list
 647   @param instances: list of instance names or None for all instances
 648   @rtype: list
 649   @return: the list of instances, sorted
 650   @raise errors.OpPrereqError: if the instances parameter is wrong type
 651   @raise errors.OpPrereqError: if any of the passed instances is not found
 652
 653   """
 654   if instances:
 655     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 656   else:
 657     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 658   return wanted
 659
 660
 661 def _GetUpdatedParams(old_params, update_dict,
 662                       use_default=True, use_none=False):
 663   """Return the new version of a parameter dictionary.
 664
 665   @type old_params: dict
 666   @param old_params: old parameters
 667   @type update_dict: dict
 668   @param update_dict: dict containing new parameter values, or
 669       constants.VALUE_DEFAULT to reset the parameter to its default
 670       value
 671   @param use_default: boolean
 672   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 673       values as 'to be deleted' values
 674   @param use_none: boolean
 675   @type use_none: whether to recognise C{None} values as 'to be
 676       deleted' values
 677   @rtype: dict
 678   @return: the new parameter dictionary
 679
 680   """
 681   params_copy = copy.deepcopy(old_params)
 682   for key, val in update_dict.iteritems():
 683     if ((use_default and val == constants.VALUE_DEFAULT) or
 684         (use_none and val is None)):
 685       try:
 686         del params_copy[key]
 687       except KeyError:
 688         pass
 689     else:
 690       params_copy[key] = val
 691   return params_copy
 692
 693
 694 def _ReleaseLocks(lu, level, names=None, keep=None):
 695   """Releases locks owned by an LU.
 696
 697   @type lu: L{LogicalUnit}
 698   @param level: Lock level
 699   @type names: list or None
 700   @param names: Names of locks to release
 701   @type keep: list or None
 702   @param keep: Names of locks to retain
 703
 704   """
 705   assert not (keep is not None and names is not None), \
 706          "Only one of the 'names' and the 'keep' parameters can be given"
 707
 708   if names is not None:
 709     should_release = names.__contains__
 710   elif keep:
 711     should_release = lambda name: name not in keep
 712   else:
 713     should_release = None
 714
 715   if should_release:
 716     retain = []
 717     release = []
 718
 719     # Determine which locks to release
 720     for name in lu.owned_locks(level):
 721       if should_release(name):
 722         release.append(name)
 723       else:
 724         retain.append(name)
 725
 726     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 727
 728     # Release just some locks
 729     lu.glm.release(level, names=release)
 730
 731     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 732   else:
 733     # Release everything
 734     lu.glm.release(level)
 735
 736     assert not lu.glm.is_owned(level), "No locks should be owned"
 737
 738
 739 def _MapInstanceDisksToNodes(instances):
 740   """Creates a map from (node, volume) to instance name.
 741
 742   @type instances: list of L{objects.Instance}
 743   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 744
 745   """
 746   return dict(((node, vol), inst.name)
 747               for inst in instances
 748               for (node, vols) in inst.MapLVsByNode().items()
 749               for vol in vols)
 750
 751
 752 def _RunPostHook(lu, node_name):
 753   """Runs the post-hook for an opcode on a single node.
 754
 755   """
 756   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 757   try:
 758     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 759   except:
 760     # pylint: disable=W0702
 761     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 762
 763
 764 def _CheckOutputFields(static, dynamic, selected):
 765   """Checks whether all selected fields are valid.
 766
 767   @type static: L{utils.FieldSet}
 768   @param static: static fields set
 769   @type dynamic: L{utils.FieldSet}
 770   @param dynamic: dynamic fields set
 771
 772   """
 773   f = utils.FieldSet()
 774   f.Extend(static)
 775   f.Extend(dynamic)
 776
 777   delta = f.NonMatching(selected)
 778   if delta:
 779     raise errors.OpPrereqError("Unknown output fields selected: %s"
 780                                % ",".join(delta), errors.ECODE_INVAL)
 781
 782
 783 def _CheckGlobalHvParams(params):
 784   """Validates that given hypervisor params are not global ones.
 785
 786   This will ensure that instances don't get customised versions of
 787   global params.
 788
 789   """
 790   used_globals = constants.HVC_GLOBALS.intersection(params)
 791   if used_globals:
 792     msg = ("The following hypervisor parameters are global and cannot"
 793            " be customized at instance level, please modify them at"
 794            " cluster level: %s" % utils.CommaJoin(used_globals))
 795     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 796
 797
 798 def _CheckNodeOnline(lu, node, msg=None):
 799   """Ensure that a given node is online.
 800
 801   @param lu: the LU on behalf of which we make the check
 802   @param node: the node to check
 803   @param msg: if passed, should be a message to replace the default one
 804   @raise errors.OpPrereqError: if the node is offline
 805
 806   """
 807   if msg is None:
 808     msg = "Can't use offline node"
 809   if lu.cfg.GetNodeInfo(node).offline:
 810     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 811
 812
 813 def _CheckNodeNotDrained(lu, node):
 814   """Ensure that a given node is not drained.
 815
 816   @param lu: the LU on behalf of which we make the check
 817   @param node: the node to check
 818   @raise errors.OpPrereqError: if the node is drained
 819
 820   """
 821   if lu.cfg.GetNodeInfo(node).drained:
 822     raise errors.OpPrereqError("Can't use drained node %s" % node,
 823                                errors.ECODE_STATE)
 824
 825
 826 def _CheckNodeVmCapable(lu, node):
 827   """Ensure that a given node is vm capable.
 828
 829   @param lu: the LU on behalf of which we make the check
 830   @param node: the node to check
 831   @raise errors.OpPrereqError: if the node is not vm capable
 832
 833   """
 834   if not lu.cfg.GetNodeInfo(node).vm_capable:
 835     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 836                                errors.ECODE_STATE)
 837
 838
 839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 840   """Ensure that a node supports a given OS.
 841
 842   @param lu: the LU on behalf of which we make the check
 843   @param node: the node to check
 844   @param os_name: the OS to query about
 845   @param force_variant: whether to ignore variant errors
 846   @raise errors.OpPrereqError: if the node is not supporting the OS
 847
 848   """
 849   result = lu.rpc.call_os_get(node, os_name)
 850   result.Raise("OS '%s' not in supported OS list for node %s" %
 851                (os_name, node),
 852                prereq=True, ecode=errors.ECODE_INVAL)
 853   if not force_variant:
 854     _CheckOSVariant(result.payload, os_name)
 855
 856
 857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 858   """Ensure that a node has the given secondary ip.
 859
 860   @type lu: L{LogicalUnit}
 861   @param lu: the LU on behalf of which we make the check
 862   @type node: string
 863   @param node: the node to check
 864   @type secondary_ip: string
 865   @param secondary_ip: the ip to check
 866   @type prereq: boolean
 867   @param prereq: whether to throw a prerequisite or an execute error
 868   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 869   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 870
 871   """
 872   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 873   result.Raise("Failure checking secondary ip on node %s" % node,
 874                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 875   if not result.payload:
 876     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 877            " please fix and re-run this command" % secondary_ip)
 878     if prereq:
 879       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 880     else:
 881       raise errors.OpExecError(msg)
 882
 883
 884 def _GetClusterDomainSecret():
 885   """Reads the cluster domain secret.
 886
 887   """
 888   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 889                                strict=True)
 890
 891
 892 def _CheckInstanceDown(lu, instance, reason):
 893   """Ensure that an instance is not running."""
 894   if instance.admin_up:
 895     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 896                                (instance.name, reason), errors.ECODE_STATE)
 897
 898   pnode = instance.primary_node
 899   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 900   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 901               prereq=True, ecode=errors.ECODE_ENVIRON)
 902
 903   if instance.name in ins_l.payload:
 904     raise errors.OpPrereqError("Instance %s is running, %s" %
 905                                (instance.name, reason), errors.ECODE_STATE)
 906
 907
 908 def _ExpandItemName(fn, name, kind):
 909   """Expand an item name.
 910
 911   @param fn: the function to use for expansion
 912   @param name: requested item name
 913   @param kind: text description ('Node' or 'Instance')
 914   @return: the resolved (full) name
 915   @raise errors.OpPrereqError: if the item is not found
 916
 917   """
 918   full_name = fn(name)
 919   if full_name is None:
 920     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 921                                errors.ECODE_NOENT)
 922   return full_name
 923
 924
 925 def _ExpandNodeName(cfg, name):
 926   """Wrapper over L{_ExpandItemName} for nodes."""
 927   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 928
 929
 930 def _ExpandInstanceName(cfg, name):
 931   """Wrapper over L{_ExpandItemName} for instance."""
 932   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 933
 934
 935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 936                           memory, vcpus, nics, disk_template, disks,
 937                           bep, hvp, hypervisor_name, tags):
 938   """Builds instance related env variables for hooks
 939
 940   This builds the hook environment from individual variables.
 941
 942   @type name: string
 943   @param name: the name of the instance
 944   @type primary_node: string
 945   @param primary_node: the name of the instance's primary node
 946   @type secondary_nodes: list
 947   @param secondary_nodes: list of secondary nodes as strings
 948   @type os_type: string
 949   @param os_type: the name of the instance's OS
 950   @type status: boolean
 951   @param status: the should_run status of the instance
 952   @type memory: string
 953   @param memory: the memory size of the instance
 954   @type vcpus: string
 955   @param vcpus: the count of VCPUs the instance has
 956   @type nics: list
 957   @param nics: list of tuples (ip, mac, mode, link) representing
 958       the NICs the instance has
 959   @type disk_template: string
 960   @param disk_template: the disk template of the instance
 961   @type disks: list
 962   @param disks: the list of (size, mode) pairs
 963   @type bep: dict
 964   @param bep: the backend parameters for the instance
 965   @type hvp: dict
 966   @param hvp: the hypervisor parameters for the instance
 967   @type hypervisor_name: string
 968   @param hypervisor_name: the hypervisor for the instance
 969   @type tags: list
 970   @param tags: list of instance tags as strings
 971   @rtype: dict
 972   @return: the hook environment for this instance
 973
 974   """
 975   if status:
 976     str_status = "up"
 977   else:
 978     str_status = "down"
 979   env = {
 980     "OP_TARGET": name,
 981     "INSTANCE_NAME": name,
 982     "INSTANCE_PRIMARY": primary_node,
 983     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 984     "INSTANCE_OS_TYPE": os_type,
 985     "INSTANCE_STATUS": str_status,
 986     "INSTANCE_MEMORY": memory,
 987     "INSTANCE_VCPUS": vcpus,
 988     "INSTANCE_DISK_TEMPLATE": disk_template,
 989     "INSTANCE_HYPERVISOR": hypervisor_name,
 990   }
 991
 992   if nics:
 993     nic_count = len(nics)
 994     for idx, (ip, mac, mode, link) in enumerate(nics):
 995       if ip is None:
 996         ip = ""
 997       env["INSTANCE_NIC%d_IP" % idx] = ip
 998       env["INSTANCE_NIC%d_MAC" % idx] = mac
 999       env["INSTANCE_NIC%d_MODE" % idx] = mode
1000       env["INSTANCE_NIC%d_LINK" % idx] = link
1001       if mode == constants.NIC_MODE_BRIDGED:
1002         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003   else:
1004     nic_count = 0
1005
1006   env["INSTANCE_NIC_COUNT"] = nic_count
1007
1008   if disks:
1009     disk_count = len(disks)
1010     for idx, (size, mode) in enumerate(disks):
1011       env["INSTANCE_DISK%d_SIZE" % idx] = size
1012       env["INSTANCE_DISK%d_MODE" % idx] = mode
1013   else:
1014     disk_count = 0
1015
1016   env["INSTANCE_DISK_COUNT"] = disk_count
1017
1018   if not tags:
1019     tags = []
1020
1021   env["INSTANCE_TAGS"] = " ".join(tags)
1022
1023   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024     for key, value in source.items():
1025       env["INSTANCE_%s_%s" % (kind, key)] = value
1026
1027   return env
1028
1029
1030 def _NICListToTuple(lu, nics):
1031   """Build a list of nic information tuples.
1032
1033   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034   value in LUInstanceQueryData.
1035
1036   @type lu:  L{LogicalUnit}
1037   @param lu: the logical unit on whose behalf we execute
1038   @type nics: list of L{objects.NIC}
1039   @param nics: list of nics to convert to hooks tuples
1040
1041   """
1042   hooks_nics = []
1043   cluster = lu.cfg.GetClusterInfo()
1044   for nic in nics:
1045     ip = nic.ip
1046     mac = nic.mac
1047     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048     mode = filled_params[constants.NIC_MODE]
1049     link = filled_params[constants.NIC_LINK]
1050     hooks_nics.append((ip, mac, mode, link))
1051   return hooks_nics
1052
1053
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055   """Builds instance related env variables for hooks from an object.
1056
1057   @type lu: L{LogicalUnit}
1058   @param lu: the logical unit on whose behalf we execute
1059   @type instance: L{objects.Instance}
1060   @param instance: the instance for which we should build the
1061       environment
1062   @type override: dict
1063   @param override: dictionary with key/values that will override
1064       our values
1065   @rtype: dict
1066   @return: the hook environment dictionary
1067
1068   """
1069   cluster = lu.cfg.GetClusterInfo()
1070   bep = cluster.FillBE(instance)
1071   hvp = cluster.FillHV(instance)
1072   args = {
1073     "name": instance.name,
1074     "primary_node": instance.primary_node,
1075     "secondary_nodes": instance.secondary_nodes,
1076     "os_type": instance.os,
1077     "status": instance.admin_up,
1078     "memory": bep[constants.BE_MEMORY],
1079     "vcpus": bep[constants.BE_VCPUS],
1080     "nics": _NICListToTuple(lu, instance.nics),
1081     "disk_template": instance.disk_template,
1082     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083     "bep": bep,
1084     "hvp": hvp,
1085     "hypervisor_name": instance.hypervisor,
1086     "tags": instance.tags,
1087   }
1088   if override:
1089     args.update(override)
1090   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091
1092
1093 def _AdjustCandidatePool(lu, exceptions):
1094   """Adjust the candidate pool after node operations.
1095
1096   """
1097   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098   if mod_list:
1099     lu.LogInfo("Promoted nodes to master candidate role: %s",
1100                utils.CommaJoin(node.name for node in mod_list))
1101     for name in mod_list:
1102       lu.context.ReaddNode(name)
1103   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104   if mc_now > mc_max:
1105     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106                (mc_now, mc_max))
1107
1108
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110   """Decide whether I should promote myself as a master candidate.
1111
1112   """
1113   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115   # the new node will increase mc_max with one, so:
1116   mc_should = min(mc_should + 1, cp_size)
1117   return mc_now < mc_should
1118
1119
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121   """Check that the brigdes needed by a list of nics exist.
1122
1123   """
1124   cluster = lu.cfg.GetClusterInfo()
1125   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126   brlist = [params[constants.NIC_LINK] for params in paramslist
1127             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128   if brlist:
1129     result = lu.rpc.call_bridges_exist(target_node, brlist)
1130     result.Raise("Error checking bridges on destination node '%s'" %
1131                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132
1133
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135   """Check that the brigdes needed by an instance exist.
1136
1137   """
1138   if node is None:
1139     node = instance.primary_node
1140   _CheckNicsBridgesExist(lu, instance.nics, node)
1141
1142
1143 def _CheckOSVariant(os_obj, name):
1144   """Check whether an OS name conforms to the os variants specification.
1145
1146   @type os_obj: L{objects.OS}
1147   @param os_obj: OS object to check
1148   @type name: string
1149   @param name: OS name passed by the user, to check for validity
1150
1151   """
1152   variant = objects.OS.GetVariant(name)
1153   if not os_obj.supported_variants:
1154     if variant:
1155       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156                                  " passed)" % (os_obj.name, variant),
1157                                  errors.ECODE_INVAL)
1158     return
1159   if not variant:
1160     raise errors.OpPrereqError("OS name must include a variant",
1161                                errors.ECODE_INVAL)
1162
1163   if variant not in os_obj.supported_variants:
1164     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165
1166
1167 def _GetNodeInstancesInner(cfg, fn):
1168   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169
1170
1171 def _GetNodeInstances(cfg, node_name):
1172   """Returns a list of all primary and secondary instances on a node.
1173
1174   """
1175
1176   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177
1178
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180   """Returns primary instances on a node.
1181
1182   """
1183   return _GetNodeInstancesInner(cfg,
1184                                 lambda inst: node_name == inst.primary_node)
1185
1186
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188   """Returns secondary instances on a node.
1189
1190   """
1191   return _GetNodeInstancesInner(cfg,
1192                                 lambda inst: node_name in inst.secondary_nodes)
1193
1194
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196   """Returns the arguments for a storage type.
1197
1198   """
1199   # Special case for file storage
1200   if storage_type == constants.ST_FILE:
1201     # storage.FileStorage wants a list of storage directories
1202     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203
1204   return []
1205
1206
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208   faulty = []
1209
1210   for dev in instance.disks:
1211     cfg.SetDiskID(dev, node_name)
1212
1213   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214   result.Raise("Failed to get disk status from node %s" % node_name,
1215                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216
1217   for idx, bdev_status in enumerate(result.payload):
1218     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219       faulty.append(idx)
1220
1221   return faulty
1222
1223
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225   """Check the sanity of iallocator and node arguments and use the
1226   cluster-wide iallocator if appropriate.
1227
1228   Check that at most one of (iallocator, node) is specified. If none is
1229   specified, then the LU's opcode's iallocator slot is filled with the
1230   cluster-wide default iallocator.
1231
1232   @type iallocator_slot: string
1233   @param iallocator_slot: the name of the opcode iallocator slot
1234   @type node_slot: string
1235   @param node_slot: the name of the opcode target node slot
1236
1237   """
1238   node = getattr(lu.op, node_slot, None)
1239   iallocator = getattr(lu.op, iallocator_slot, None)
1240
1241   if node is not None and iallocator is not None:
1242     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243                                errors.ECODE_INVAL)
1244   elif node is None and iallocator is None:
1245     default_iallocator = lu.cfg.GetDefaultIAllocator()
1246     if default_iallocator:
1247       setattr(lu.op, iallocator_slot, default_iallocator)
1248     else:
1249       raise errors.OpPrereqError("No iallocator or node given and no"
1250                                  " cluster-wide default iallocator found;"
1251                                  " please specify either an iallocator or a"
1252                                  " node, or set a cluster-wide default"
1253                                  " iallocator")
1254
1255
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257   """Decides on which iallocator to use.
1258
1259   @type cfg: L{config.ConfigWriter}
1260   @param cfg: Cluster configuration object
1261   @type iallocator: string or None
1262   @param iallocator: Iallocator specified in opcode
1263   @rtype: string
1264   @return: Iallocator name
1265
1266   """
1267   if not iallocator:
1268     # Use default iallocator
1269     iallocator = cfg.GetDefaultIAllocator()
1270
1271   if not iallocator:
1272     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273                                " opcode nor as a cluster-wide default",
1274                                errors.ECODE_INVAL)
1275
1276   return iallocator
1277
1278
1279 class LUClusterPostInit(LogicalUnit):
1280   """Logical unit for running hooks after cluster initialization.
1281
1282   """
1283   HPATH = "cluster-init"
1284   HTYPE = constants.HTYPE_CLUSTER
1285
1286   def BuildHooksEnv(self):
1287     """Build hooks env.
1288
1289     """
1290     return {
1291       "OP_TARGET": self.cfg.GetClusterName(),
1292       }
1293
1294   def BuildHooksNodes(self):
1295     """Build hooks nodes.
1296
1297     """
1298     return ([], [self.cfg.GetMasterNode()])
1299
1300   def Exec(self, feedback_fn):
1301     """Nothing to do.
1302
1303     """
1304     return True
1305
1306
1307 class LUClusterDestroy(LogicalUnit):
1308   """Logical unit for destroying the cluster.
1309
1310   """
1311   HPATH = "cluster-destroy"
1312   HTYPE = constants.HTYPE_CLUSTER
1313
1314   def BuildHooksEnv(self):
1315     """Build hooks env.
1316
1317     """
1318     return {
1319       "OP_TARGET": self.cfg.GetClusterName(),
1320       }
1321
1322   def BuildHooksNodes(self):
1323     """Build hooks nodes.
1324
1325     """
1326     return ([], [])
1327
1328   def CheckPrereq(self):
1329     """Check prerequisites.
1330
1331     This checks whether the cluster is empty.
1332
1333     Any errors are signaled by raising errors.OpPrereqError.
1334
1335     """
1336     master = self.cfg.GetMasterNode()
1337
1338     nodelist = self.cfg.GetNodeList()
1339     if len(nodelist) != 1 or nodelist[0] != master:
1340       raise errors.OpPrereqError("There are still %d node(s) in"
1341                                  " this cluster." % (len(nodelist) - 1),
1342                                  errors.ECODE_INVAL)
1343     instancelist = self.cfg.GetInstanceList()
1344     if instancelist:
1345       raise errors.OpPrereqError("There are still %d instance(s) in"
1346                                  " this cluster." % len(instancelist),
1347                                  errors.ECODE_INVAL)
1348
1349   def Exec(self, feedback_fn):
1350     """Destroys the cluster.
1351
1352     """
1353     master = self.cfg.GetMasterNode()
1354
1355     # Run post hooks on master node before it's removed
1356     _RunPostHook(self, master)
1357
1358     result = self.rpc.call_node_deactivate_master_ip(master)
1359     result.Raise("Could not disable the master role")
1360
1361     return master
1362
1363
1364 def _VerifyCertificate(filename):
1365   """Verifies a certificate for L{LUClusterVerifyConfig}.
1366
1367   @type filename: string
1368   @param filename: Path to PEM file
1369
1370   """
1371   try:
1372     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373                                            utils.ReadFile(filename))
1374   except Exception, err: # pylint: disable=W0703
1375     return (LUClusterVerifyConfig.ETYPE_ERROR,
1376             "Failed to load X509 certificate %s: %s" % (filename, err))
1377
1378   (errcode, msg) = \
1379     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380                                 constants.SSL_CERT_EXPIRATION_ERROR)
1381
1382   if msg:
1383     fnamemsg = "While verifying %s: %s" % (filename, msg)
1384   else:
1385     fnamemsg = None
1386
1387   if errcode is None:
1388     return (None, fnamemsg)
1389   elif errcode == utils.CERT_WARNING:
1390     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391   elif errcode == utils.CERT_ERROR:
1392     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393
1394   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395
1396
1397 def _GetAllHypervisorParameters(cluster, instances):
1398   """Compute the set of all hypervisor parameters.
1399
1400   @type cluster: L{objects.Cluster}
1401   @param cluster: the cluster object
1402   @param instances: list of L{objects.Instance}
1403   @param instances: additional instances from which to obtain parameters
1404   @rtype: list of (origin, hypervisor, parameters)
1405   @return: a list with all parameters found, indicating the hypervisor they
1406        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407
1408   """
1409   hvp_data = []
1410
1411   for hv_name in cluster.enabled_hypervisors:
1412     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413
1414   for os_name, os_hvp in cluster.os_hvp.items():
1415     for hv_name, hv_params in os_hvp.items():
1416       if hv_params:
1417         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419
1420   # TODO: collapse identical parameter values in a single one
1421   for instance in instances:
1422     if instance.hvparams:
1423       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424                        cluster.FillHV(instance)))
1425
1426   return hvp_data
1427
1428
1429 class _VerifyErrors(object):
1430   """Mix-in for cluster/group verify LUs.
1431
1432   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433   self.op and self._feedback_fn to be available.)
1434
1435   """
1436   TCLUSTER = "cluster"
1437   TNODE = "node"
1438   TINSTANCE = "instance"
1439
1440   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443   ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444   ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452   ENODEDRBD = (TNODE, "ENODEDRBD")
1453   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456   ENODEHV = (TNODE, "ENODEHV")
1457   ENODELVM = (TNODE, "ENODELVM")
1458   ENODEN1 = (TNODE, "ENODEN1")
1459   ENODENET = (TNODE, "ENODENET")
1460   ENODEOS = (TNODE, "ENODEOS")
1461   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463   ENODERPC = (TNODE, "ENODERPC")
1464   ENODESSH = (TNODE, "ENODESSH")
1465   ENODEVERSION = (TNODE, "ENODEVERSION")
1466   ENODESETUP = (TNODE, "ENODESETUP")
1467   ENODETIME = (TNODE, "ENODETIME")
1468   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469
1470   ETYPE_FIELD = "code"
1471   ETYPE_ERROR = "ERROR"
1472   ETYPE_WARNING = "WARNING"
1473
1474   def _Error(self, ecode, item, msg, *args, **kwargs):
1475     """Format an error message.
1476
1477     Based on the opcode's error_codes parameter, either format a
1478     parseable error code, or a simpler error string.
1479
1480     This must be called only from Exec and functions called from Exec.
1481
1482     """
1483     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484     itype, etxt = ecode
1485     # first complete the msg
1486     if args:
1487       msg = msg % args
1488     # then format the whole message
1489     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491     else:
1492       if item:
1493         item = " " + item
1494       else:
1495         item = ""
1496       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497     # and finally report it via the feedback_fn
1498     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499
1500   def _ErrorIf(self, cond, *args, **kwargs):
1501     """Log an error message if the passed condition is True.
1502
1503     """
1504     cond = (bool(cond)
1505             or self.op.debug_simulate_errors) # pylint: disable=E1101
1506     if cond:
1507       self._Error(*args, **kwargs)
1508     # do not mark the operation as failed for WARN cases only
1509     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510       self.bad = self.bad or cond
1511
1512
1513 class LUClusterVerify(NoHooksLU):
1514   """Submits all jobs necessary to verify the cluster.
1515
1516   """
1517   REQ_BGL = False
1518
1519   def ExpandNames(self):
1520     self.needed_locks = {}
1521
1522   def Exec(self, feedback_fn):
1523     jobs = []
1524
1525     if self.op.group_name:
1526       groups = [self.op.group_name]
1527       depends_fn = lambda: None
1528     else:
1529       groups = self.cfg.GetNodeGroupList()
1530
1531       # Verify global configuration
1532       jobs.append([opcodes.OpClusterVerifyConfig()])
1533
1534       # Always depend on global verification
1535       depends_fn = lambda: [(-len(jobs), [])]
1536
1537     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538                                               depends=depends_fn())]
1539                 for group in groups)
1540
1541     # Fix up all parameters
1542     for op in itertools.chain(*jobs): # pylint: disable=W0142
1543       op.debug_simulate_errors = self.op.debug_simulate_errors
1544       op.verbose = self.op.verbose
1545       op.error_codes = self.op.error_codes
1546       try:
1547         op.skip_checks = self.op.skip_checks
1548       except AttributeError:
1549         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550
1551     return ResultWithJobs(jobs)
1552
1553
1554 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555   """Verifies the cluster config.
1556
1557   """
1558   REQ_BGL = True
1559
1560   def _VerifyHVP(self, hvp_data):
1561     """Verifies locally the syntax of the hypervisor parameters.
1562
1563     """
1564     for item, hv_name, hv_params in hvp_data:
1565       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566              (item, hv_name))
1567       try:
1568         hv_class = hypervisor.GetHypervisor(hv_name)
1569         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570         hv_class.CheckParameterSyntax(hv_params)
1571       except errors.GenericError, err:
1572         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573
1574   def ExpandNames(self):
1575     # Information can be safely retrieved as the BGL is acquired in exclusive
1576     # mode
1577     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579     self.all_node_info = self.cfg.GetAllNodesInfo()
1580     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581     self.needed_locks = {}
1582
1583   def Exec(self, feedback_fn):
1584     """Verify integrity of cluster, performing various test on nodes.
1585
1586     """
1587     self.bad = False
1588     self._feedback_fn = feedback_fn
1589
1590     feedback_fn("* Verifying cluster config")
1591
1592     for msg in self.cfg.VerifyConfig():
1593       self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594
1595     feedback_fn("* Verifying cluster certificate files")
1596
1597     for cert_filename in constants.ALL_CERT_FILES:
1598       (errcode, msg) = _VerifyCertificate(cert_filename)
1599       self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600
1601     feedback_fn("* Verifying hypervisor parameters")
1602
1603     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604                                                 self.all_inst_info.values()))
1605
1606     feedback_fn("* Verifying all nodes belong to an existing group")
1607
1608     # We do this verification here because, should this bogus circumstance
1609     # occur, it would never be caught by VerifyGroup, which only acts on
1610     # nodes/instances reachable from existing node groups.
1611
1612     dangling_nodes = set(node.name for node in self.all_node_info.values()
1613                          if node.group not in self.all_group_info)
1614
1615     dangling_instances = {}
1616     no_node_instances = []
1617
1618     for inst in self.all_inst_info.values():
1619       if inst.primary_node in dangling_nodes:
1620         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621       elif inst.primary_node not in self.all_node_info:
1622         no_node_instances.append(inst.name)
1623
1624     pretty_dangling = [
1625         "%s (%s)" %
1626         (node.name,
1627          utils.CommaJoin(dangling_instances.get(node.name,
1628                                                 ["no instances"])))
1629         for node in dangling_nodes]
1630
1631     self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632                   "the following nodes (and their instances) belong to a non"
1633                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1634
1635     self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636                   "the following instances have a non-existing primary-node:"
1637                   " %s", utils.CommaJoin(no_node_instances))
1638
1639     return not self.bad
1640
1641
1642 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643   """Verifies the status of a node group.
1644
1645   """
1646   HPATH = "cluster-verify"
1647   HTYPE = constants.HTYPE_CLUSTER
1648   REQ_BGL = False
1649
1650   _HOOKS_INDENT_RE = re.compile("^", re.M)
1651
1652   class NodeImage(object):
1653     """A class representing the logical and physical status of a node.
1654
1655     @type name: string
1656     @ivar name: the node name to which this object refers
1657     @ivar volumes: a structure as returned from
1658         L{ganeti.backend.GetVolumeList} (runtime)
1659     @ivar instances: a list of running instances (runtime)
1660     @ivar pinst: list of configured primary instances (config)
1661     @ivar sinst: list of configured secondary instances (config)
1662     @ivar sbp: dictionary of {primary-node: list of instances} for all
1663         instances for which this node is secondary (config)
1664     @ivar mfree: free memory, as reported by hypervisor (runtime)
1665     @ivar dfree: free disk, as reported by the node (runtime)
1666     @ivar offline: the offline status (config)
1667     @type rpc_fail: boolean
1668     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669         not whether the individual keys were correct) (runtime)
1670     @type lvm_fail: boolean
1671     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672     @type hyp_fail: boolean
1673     @ivar hyp_fail: whether the RPC call didn't return the instance list
1674     @type ghost: boolean
1675     @ivar ghost: whether this is a known node or not (config)
1676     @type os_fail: boolean
1677     @ivar os_fail: whether the RPC call didn't return valid OS data
1678     @type oslist: list
1679     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680     @type vm_capable: boolean
1681     @ivar vm_capable: whether the node can host instances
1682
1683     """
1684     def __init__(self, offline=False, name=None, vm_capable=True):
1685       self.name = name
1686       self.volumes = {}
1687       self.instances = []
1688       self.pinst = []
1689       self.sinst = []
1690       self.sbp = {}
1691       self.mfree = 0
1692       self.dfree = 0
1693       self.offline = offline
1694       self.vm_capable = vm_capable
1695       self.rpc_fail = False
1696       self.lvm_fail = False
1697       self.hyp_fail = False
1698       self.ghost = False
1699       self.os_fail = False
1700       self.oslist = {}
1701
1702   def ExpandNames(self):
1703     # This raises errors.OpPrereqError on its own:
1704     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705
1706     # Get instances in node group; this is unsafe and needs verification later
1707     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1708
1709     self.needed_locks = {
1710       locking.LEVEL_INSTANCE: inst_names,
1711       locking.LEVEL_NODEGROUP: [self.group_uuid],
1712       locking.LEVEL_NODE: [],
1713       }
1714
1715     self.share_locks = _ShareAll()
1716
1717   def DeclareLocks(self, level):
1718     if level == locking.LEVEL_NODE:
1719       # Get members of node group; this is unsafe and needs verification later
1720       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1721
1722       all_inst_info = self.cfg.GetAllInstancesInfo()
1723
1724       # In Exec(), we warn about mirrored instances that have primary and
1725       # secondary living in separate node groups. To fully verify that
1726       # volumes for these instances are healthy, we will need to do an
1727       # extra call to their secondaries. We ensure here those nodes will
1728       # be locked.
1729       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730         # Important: access only the instances whose lock is owned
1731         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732           nodes.update(all_inst_info[inst].secondary_nodes)
1733
1734       self.needed_locks[locking.LEVEL_NODE] = nodes
1735
1736   def CheckPrereq(self):
1737     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1739
1740     group_nodes = set(self.group_info.members)
1741     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1742
1743     unlocked_nodes = \
1744         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1745
1746     unlocked_instances = \
1747         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1748
1749     if unlocked_nodes:
1750       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751                                  utils.CommaJoin(unlocked_nodes))
1752
1753     if unlocked_instances:
1754       raise errors.OpPrereqError("Missing lock for instances: %s" %
1755                                  utils.CommaJoin(unlocked_instances))
1756
1757     self.all_node_info = self.cfg.GetAllNodesInfo()
1758     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1759
1760     self.my_node_names = utils.NiceSort(group_nodes)
1761     self.my_inst_names = utils.NiceSort(group_instances)
1762
1763     self.my_node_info = dict((name, self.all_node_info[name])
1764                              for name in self.my_node_names)
1765
1766     self.my_inst_info = dict((name, self.all_inst_info[name])
1767                              for name in self.my_inst_names)
1768
1769     # We detect here the nodes that will need the extra RPC calls for verifying
1770     # split LV volumes; they should be locked.
1771     extra_lv_nodes = set()
1772
1773     for inst in self.my_inst_info.values():
1774       if inst.disk_template in constants.DTS_INT_MIRROR:
1775         group = self.my_node_info[inst.primary_node].group
1776         for nname in inst.secondary_nodes:
1777           if self.all_node_info[nname].group != group:
1778             extra_lv_nodes.add(nname)
1779
1780     unlocked_lv_nodes = \
1781         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1782
1783     if unlocked_lv_nodes:
1784       raise errors.OpPrereqError("these nodes could be locked: %s" %
1785                                  utils.CommaJoin(unlocked_lv_nodes))
1786     self.extra_lv_nodes = list(extra_lv_nodes)
1787
1788   def _VerifyNode(self, ninfo, nresult):
1789     """Perform some basic validation on data returned from a node.
1790
1791       - check the result data structure is well formed and has all the
1792         mandatory fields
1793       - check ganeti version
1794
1795     @type ninfo: L{objects.Node}
1796     @param ninfo: the node to check
1797     @param nresult: the results from the node
1798     @rtype: boolean
1799     @return: whether overall this call was successful (and we can expect
1800          reasonable values in the respose)
1801
1802     """
1803     node = ninfo.name
1804     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1805
1806     # main result, nresult should be a non-empty dict
1807     test = not nresult or not isinstance(nresult, dict)
1808     _ErrorIf(test, self.ENODERPC, node,
1809                   "unable to verify node: no data returned")
1810     if test:
1811       return False
1812
1813     # compares ganeti version
1814     local_version = constants.PROTOCOL_VERSION
1815     remote_version = nresult.get("version", None)
1816     test = not (remote_version and
1817                 isinstance(remote_version, (list, tuple)) and
1818                 len(remote_version) == 2)
1819     _ErrorIf(test, self.ENODERPC, node,
1820              "connection to node returned invalid data")
1821     if test:
1822       return False
1823
1824     test = local_version != remote_version[0]
1825     _ErrorIf(test, self.ENODEVERSION, node,
1826              "incompatible protocol versions: master %s,"
1827              " node %s", local_version, remote_version[0])
1828     if test:
1829       return False
1830
1831     # node seems compatible, we can actually try to look into its results
1832
1833     # full package version
1834     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835                   self.ENODEVERSION, node,
1836                   "software version mismatch: master %s, node %s",
1837                   constants.RELEASE_VERSION, remote_version[1],
1838                   code=self.ETYPE_WARNING)
1839
1840     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841     if ninfo.vm_capable and isinstance(hyp_result, dict):
1842       for hv_name, hv_result in hyp_result.iteritems():
1843         test = hv_result is not None
1844         _ErrorIf(test, self.ENODEHV, node,
1845                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1846
1847     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848     if ninfo.vm_capable and isinstance(hvp_result, list):
1849       for item, hv_name, hv_result in hvp_result:
1850         _ErrorIf(True, self.ENODEHV, node,
1851                  "hypervisor %s parameter verify failure (source %s): %s",
1852                  hv_name, item, hv_result)
1853
1854     test = nresult.get(constants.NV_NODESETUP,
1855                        ["Missing NODESETUP results"])
1856     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1857              "; ".join(test))
1858
1859     return True
1860
1861   def _VerifyNodeTime(self, ninfo, nresult,
1862                       nvinfo_starttime, nvinfo_endtime):
1863     """Check the node time.
1864
1865     @type ninfo: L{objects.Node}
1866     @param ninfo: the node to check
1867     @param nresult: the remote results for the node
1868     @param nvinfo_starttime: the start time of the RPC call
1869     @param nvinfo_endtime: the end time of the RPC call
1870
1871     """
1872     node = ninfo.name
1873     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1874
1875     ntime = nresult.get(constants.NV_TIME, None)
1876     try:
1877       ntime_merged = utils.MergeTime(ntime)
1878     except (ValueError, TypeError):
1879       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1880       return
1881
1882     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1886     else:
1887       ntime_diff = None
1888
1889     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890              "Node time diverges by at least %s from master node time",
1891              ntime_diff)
1892
1893   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894     """Check the node LVM results.
1895
1896     @type ninfo: L{objects.Node}
1897     @param ninfo: the node to check
1898     @param nresult: the remote results for the node
1899     @param vg_name: the configured VG name
1900
1901     """
1902     if vg_name is None:
1903       return
1904
1905     node = ninfo.name
1906     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1907
1908     # checks vg existence and size > 20G
1909     vglist = nresult.get(constants.NV_VGLIST, None)
1910     test = not vglist
1911     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1912     if not test:
1913       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914                                             constants.MIN_VG_SIZE)
1915       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1916
1917     # check pv names
1918     pvlist = nresult.get(constants.NV_PVLIST, None)
1919     test = pvlist is None
1920     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1921     if not test:
1922       # check that ':' is not present in PV names, since it's a
1923       # special character for lvcreate (denotes the range of PEs to
1924       # use on the PV)
1925       for _, pvname, owner_vg in pvlist:
1926         test = ":" in pvname
1927         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928                  " '%s' of VG '%s'", pvname, owner_vg)
1929
1930   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931     """Check the node bridges.
1932
1933     @type ninfo: L{objects.Node}
1934     @param ninfo: the node to check
1935     @param nresult: the remote results for the node
1936     @param bridges: the expected list of bridges
1937
1938     """
1939     if not bridges:
1940       return
1941
1942     node = ninfo.name
1943     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1944
1945     missing = nresult.get(constants.NV_BRIDGES, None)
1946     test = not isinstance(missing, list)
1947     _ErrorIf(test, self.ENODENET, node,
1948              "did not return valid bridge information")
1949     if not test:
1950       _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951                utils.CommaJoin(sorted(missing)))
1952
1953   def _VerifyNodeNetwork(self, ninfo, nresult):
1954     """Check the node network connectivity results.
1955
1956     @type ninfo: L{objects.Node}
1957     @param ninfo: the node to check
1958     @param nresult: the remote results for the node
1959
1960     """
1961     node = ninfo.name
1962     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1963
1964     test = constants.NV_NODELIST not in nresult
1965     _ErrorIf(test, self.ENODESSH, node,
1966              "node hasn't returned node ssh connectivity data")
1967     if not test:
1968       if nresult[constants.NV_NODELIST]:
1969         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970           _ErrorIf(True, self.ENODESSH, node,
1971                    "ssh communication with node '%s': %s", a_node, a_msg)
1972
1973     test = constants.NV_NODENETTEST not in nresult
1974     _ErrorIf(test, self.ENODENET, node,
1975              "node hasn't returned node tcp connectivity data")
1976     if not test:
1977       if nresult[constants.NV_NODENETTEST]:
1978         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1979         for anode in nlist:
1980           _ErrorIf(True, self.ENODENET, node,
1981                    "tcp communication with node '%s': %s",
1982                    anode, nresult[constants.NV_NODENETTEST][anode])
1983
1984     test = constants.NV_MASTERIP not in nresult
1985     _ErrorIf(test, self.ENODENET, node,
1986              "node hasn't returned node master IP reachability data")
1987     if not test:
1988       if not nresult[constants.NV_MASTERIP]:
1989         if node == self.master_node:
1990           msg = "the master node cannot reach the master IP (not configured?)"
1991         else:
1992           msg = "cannot reach the master IP"
1993         _ErrorIf(True, self.ENODENET, node, msg)
1994
1995   def _VerifyInstance(self, instance, instanceconfig, node_image,
1996                       diskstatus):
1997     """Verify an instance.
1998
1999     This function checks to see if the required block devices are
2000     available on the instance's node.
2001
2002     """
2003     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004     node_current = instanceconfig.primary_node
2005
2006     node_vol_should = {}
2007     instanceconfig.MapLVsByNode(node_vol_should)
2008
2009     for node in node_vol_should:
2010       n_img = node_image[node]
2011       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012         # ignore missing volumes on offline or broken nodes
2013         continue
2014       for volume in node_vol_should[node]:
2015         test = volume not in n_img.volumes
2016         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017                  "volume %s missing on node %s", volume, node)
2018
2019     if instanceconfig.admin_up:
2020       pri_img = node_image[node_current]
2021       test = instance not in pri_img.instances and not pri_img.offline
2022       _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023                "instance not running on its primary node %s",
2024                node_current)
2025
2026     diskdata = [(nname, success, status, idx)
2027                 for (nname, disks) in diskstatus.items()
2028                 for idx, (success, status) in enumerate(disks)]
2029
2030     for nname, success, bdev_status, idx in diskdata:
2031       # the 'ghost node' construction in Exec() ensures that we have a
2032       # node here
2033       snode = node_image[nname]
2034       bad_snode = snode.ghost or snode.offline
2035       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036                self.EINSTANCEFAULTYDISK, instance,
2037                "couldn't retrieve status for disk/%s on %s: %s",
2038                idx, nname, bdev_status)
2039       _ErrorIf((instanceconfig.admin_up and success and
2040                 bdev_status.ldisk_status == constants.LDS_FAULTY),
2041                self.EINSTANCEFAULTYDISK, instance,
2042                "disk/%s on %s is faulty", idx, nname)
2043
2044   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045     """Verify if there are any unknown volumes in the cluster.
2046
2047     The .os, .swap and backup volumes are ignored. All other volumes are
2048     reported as unknown.
2049
2050     @type reserved: L{ganeti.utils.FieldSet}
2051     @param reserved: a FieldSet of reserved volume names
2052
2053     """
2054     for node, n_img in node_image.items():
2055       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056         # skip non-healthy nodes
2057         continue
2058       for volume in n_img.volumes:
2059         test = ((node not in node_vol_should or
2060                 volume not in node_vol_should[node]) and
2061                 not reserved.Matches(volume))
2062         self._ErrorIf(test, self.ENODEORPHANLV, node,
2063                       "volume %s is unknown", volume)
2064
2065   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066     """Verify N+1 Memory Resilience.
2067
2068     Check that if one single node dies we can still start all the
2069     instances it was primary for.
2070
2071     """
2072     cluster_info = self.cfg.GetClusterInfo()
2073     for node, n_img in node_image.items():
2074       # This code checks that every node which is now listed as
2075       # secondary has enough memory to host all instances it is
2076       # supposed to should a single other node in the cluster fail.
2077       # FIXME: not ready for failover to an arbitrary node
2078       # FIXME: does not support file-backed instances
2079       # WARNING: we currently take into account down instances as well
2080       # as up ones, considering that even if they're down someone
2081       # might want to start them even in the event of a node failure.
2082       if n_img.offline:
2083         # we're skipping offline nodes from the N+1 warning, since
2084         # most likely we don't have good memory infromation from them;
2085         # we already list instances living on such nodes, and that's
2086         # enough warning
2087         continue
2088       for prinode, instances in n_img.sbp.items():
2089         needed_mem = 0
2090         for instance in instances:
2091           bep = cluster_info.FillBE(instance_cfg[instance])
2092           if bep[constants.BE_AUTO_BALANCE]:
2093             needed_mem += bep[constants.BE_MEMORY]
2094         test = n_img.mfree < needed_mem
2095         self._ErrorIf(test, self.ENODEN1, node,
2096                       "not enough memory to accomodate instance failovers"
2097                       " should node %s fail (%dMiB needed, %dMiB available)",
2098                       prinode, needed_mem, n_img.mfree)
2099
2100   @classmethod
2101   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102                    (files_all, files_all_opt, files_mc, files_vm)):
2103     """Verifies file checksums collected from all nodes.
2104
2105     @param errorif: Callback for reporting errors
2106     @param nodeinfo: List of L{objects.Node} objects
2107     @param master_node: Name of master node
2108     @param all_nvinfo: RPC results
2109
2110     """
2111     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2112             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2113            "Found file listed in more than one file list"
2114
2115     # Define functions determining which nodes to consider for a file
2116     files2nodefn = [
2117       (files_all, None),
2118       (files_all_opt, None),
2119       (files_mc, lambda node: (node.master_candidate or
2120                                node.name == master_node)),
2121       (files_vm, lambda node: node.vm_capable),
2122       ]
2123
2124     # Build mapping from filename to list of nodes which should have the file
2125     nodefiles = {}
2126     for (files, fn) in files2nodefn:
2127       if fn is None:
2128         filenodes = nodeinfo
2129       else:
2130         filenodes = filter(fn, nodeinfo)
2131       nodefiles.update((filename,
2132                         frozenset(map(operator.attrgetter("name"), filenodes)))
2133                        for filename in files)
2134
2135     assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2136
2137     fileinfo = dict((filename, {}) for filename in nodefiles)
2138     ignore_nodes = set()
2139
2140     for node in nodeinfo:
2141       if node.offline:
2142         ignore_nodes.add(node.name)
2143         continue
2144
2145       nresult = all_nvinfo[node.name]
2146
2147       if nresult.fail_msg or not nresult.payload:
2148         node_files = None
2149       else:
2150         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2151
2152       test = not (node_files and isinstance(node_files, dict))
2153       errorif(test, cls.ENODEFILECHECK, node.name,
2154               "Node did not return file checksum data")
2155       if test:
2156         ignore_nodes.add(node.name)
2157         continue
2158
2159       # Build per-checksum mapping from filename to nodes having it
2160       for (filename, checksum) in node_files.items():
2161         assert filename in nodefiles
2162         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2163
2164     for (filename, checksums) in fileinfo.items():
2165       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2166
2167       # Nodes having the file
2168       with_file = frozenset(node_name
2169                             for nodes in fileinfo[filename].values()
2170                             for node_name in nodes) - ignore_nodes
2171
2172       expected_nodes = nodefiles[filename] - ignore_nodes
2173
2174       # Nodes missing file
2175       missing_file = expected_nodes - with_file
2176
2177       if filename in files_all_opt:
2178         # All or no nodes
2179         errorif(missing_file and missing_file != expected_nodes,
2180                 cls.ECLUSTERFILECHECK, None,
2181                 "File %s is optional, but it must exist on all or no"
2182                 " nodes (not found on %s)",
2183                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2184       else:
2185         # Non-optional files
2186         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2187                 "File %s is missing from node(s) %s", filename,
2188                 utils.CommaJoin(utils.NiceSort(missing_file)))
2189
2190         # Warn if a node has a file it shouldn't
2191         unexpected = with_file - expected_nodes
2192         errorif(unexpected,
2193                 cls.ECLUSTERFILECHECK, None,
2194                 "File %s should not exist on node(s) %s",
2195                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2196
2197       # See if there are multiple versions of the file
2198       test = len(checksums) > 1
2199       if test:
2200         variants = ["variant %s on %s" %
2201                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2202                     for (idx, (checksum, nodes)) in
2203                       enumerate(sorted(checksums.items()))]
2204       else:
2205         variants = []
2206
2207       errorif(test, cls.ECLUSTERFILECHECK, None,
2208               "File %s found with %s different checksums (%s)",
2209               filename, len(checksums), "; ".join(variants))
2210
2211   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2212                       drbd_map):
2213     """Verifies and the node DRBD status.
2214
2215     @type ninfo: L{objects.Node}
2216     @param ninfo: the node to check
2217     @param nresult: the remote results for the node
2218     @param instanceinfo: the dict of instances
2219     @param drbd_helper: the configured DRBD usermode helper
2220     @param drbd_map: the DRBD map as returned by
2221         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2222
2223     """
2224     node = ninfo.name
2225     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2226
2227     if drbd_helper:
2228       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2229       test = (helper_result == None)
2230       _ErrorIf(test, self.ENODEDRBDHELPER, node,
2231                "no drbd usermode helper returned")
2232       if helper_result:
2233         status, payload = helper_result
2234         test = not status
2235         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2236                  "drbd usermode helper check unsuccessful: %s", payload)
2237         test = status and (payload != drbd_helper)
2238         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2239                  "wrong drbd usermode helper: %s", payload)
2240
2241     # compute the DRBD minors
2242     node_drbd = {}
2243     for minor, instance in drbd_map[node].items():
2244       test = instance not in instanceinfo
2245       _ErrorIf(test, self.ECLUSTERCFG, None,
2246                "ghost instance '%s' in temporary DRBD map", instance)
2247         # ghost instance should not be running, but otherwise we
2248         # don't give double warnings (both ghost instance and
2249         # unallocated minor in use)
2250       if test:
2251         node_drbd[minor] = (instance, False)
2252       else:
2253         instance = instanceinfo[instance]
2254         node_drbd[minor] = (instance.name, instance.admin_up)
2255
2256     # and now check them
2257     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2258     test = not isinstance(used_minors, (tuple, list))
2259     _ErrorIf(test, self.ENODEDRBD, node,
2260              "cannot parse drbd status file: %s", str(used_minors))
2261     if test:
2262       # we cannot check drbd status
2263       return
2264
2265     for minor, (iname, must_exist) in node_drbd.items():
2266       test = minor not in used_minors and must_exist
2267       _ErrorIf(test, self.ENODEDRBD, node,
2268                "drbd minor %d of instance %s is not active", minor, iname)
2269     for minor in used_minors:
2270       test = minor not in node_drbd
2271       _ErrorIf(test, self.ENODEDRBD, node,
2272                "unallocated drbd minor %d is in use", minor)
2273
2274   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2275     """Builds the node OS structures.
2276
2277     @type ninfo: L{objects.Node}
2278     @param ninfo: the node to check
2279     @param nresult: the remote results for the node
2280     @param nimg: the node image object
2281
2282     """
2283     node = ninfo.name
2284     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2285
2286     remote_os = nresult.get(constants.NV_OSLIST, None)
2287     test = (not isinstance(remote_os, list) or
2288             not compat.all(isinstance(v, list) and len(v) == 7
2289                            for v in remote_os))
2290
2291     _ErrorIf(test, self.ENODEOS, node,
2292              "node hasn't returned valid OS data")
2293
2294     nimg.os_fail = test
2295
2296     if test:
2297       return
2298
2299     os_dict = {}
2300
2301     for (name, os_path, status, diagnose,
2302          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2303
2304       if name not in os_dict:
2305         os_dict[name] = []
2306
2307       # parameters is a list of lists instead of list of tuples due to
2308       # JSON lacking a real tuple type, fix it:
2309       parameters = [tuple(v) for v in parameters]
2310       os_dict[name].append((os_path, status, diagnose,
2311                             set(variants), set(parameters), set(api_ver)))
2312
2313     nimg.oslist = os_dict
2314
2315   def _VerifyNodeOS(self, ninfo, nimg, base):
2316     """Verifies the node OS list.
2317
2318     @type ninfo: L{objects.Node}
2319     @param ninfo: the node to check
2320     @param nimg: the node image object
2321     @param base: the 'template' node we match against (e.g. from the master)
2322
2323     """
2324     node = ninfo.name
2325     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2326
2327     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2328
2329     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2330     for os_name, os_data in nimg.oslist.items():
2331       assert os_data, "Empty OS status for OS %s?!" % os_name
2332       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2333       _ErrorIf(not f_status, self.ENODEOS, node,
2334                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2335       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2336                "OS '%s' has multiple entries (first one shadows the rest): %s",
2337                os_name, utils.CommaJoin([v[0] for v in os_data]))
2338       # comparisons with the 'base' image
2339       test = os_name not in base.oslist
2340       _ErrorIf(test, self.ENODEOS, node,
2341                "Extra OS %s not present on reference node (%s)",
2342                os_name, base.name)
2343       if test:
2344         continue
2345       assert base.oslist[os_name], "Base node has empty OS status?"
2346       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2347       if not b_status:
2348         # base OS is invalid, skipping
2349         continue
2350       for kind, a, b in [("API version", f_api, b_api),
2351                          ("variants list", f_var, b_var),
2352                          ("parameters", beautify_params(f_param),
2353                           beautify_params(b_param))]:
2354         _ErrorIf(a != b, self.ENODEOS, node,
2355                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2356                  kind, os_name, base.name,
2357                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2358
2359     # check any missing OSes
2360     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2361     _ErrorIf(missing, self.ENODEOS, node,
2362              "OSes present on reference node %s but missing on this node: %s",
2363              base.name, utils.CommaJoin(missing))
2364
2365   def _VerifyOob(self, ninfo, nresult):
2366     """Verifies out of band functionality of a node.
2367
2368     @type ninfo: L{objects.Node}
2369     @param ninfo: the node to check
2370     @param nresult: the remote results for the node
2371
2372     """
2373     node = ninfo.name
2374     # We just have to verify the paths on master and/or master candidates
2375     # as the oob helper is invoked on the master
2376     if ((ninfo.master_candidate or ninfo.master_capable) and
2377         constants.NV_OOB_PATHS in nresult):
2378       for path_result in nresult[constants.NV_OOB_PATHS]:
2379         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2380
2381   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2382     """Verifies and updates the node volume data.
2383
2384     This function will update a L{NodeImage}'s internal structures
2385     with data from the remote call.
2386
2387     @type ninfo: L{objects.Node}
2388     @param ninfo: the node to check
2389     @param nresult: the remote results for the node
2390     @param nimg: the node image object
2391     @param vg_name: the configured VG name
2392
2393     """
2394     node = ninfo.name
2395     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2396
2397     nimg.lvm_fail = True
2398     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2399     if vg_name is None:
2400       pass
2401     elif isinstance(lvdata, basestring):
2402       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2403                utils.SafeEncode(lvdata))
2404     elif not isinstance(lvdata, dict):
2405       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2406     else:
2407       nimg.volumes = lvdata
2408       nimg.lvm_fail = False
2409
2410   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2411     """Verifies and updates the node instance list.
2412
2413     If the listing was successful, then updates this node's instance
2414     list. Otherwise, it marks the RPC call as failed for the instance
2415     list key.
2416
2417     @type ninfo: L{objects.Node}
2418     @param ninfo: the node to check
2419     @param nresult: the remote results for the node
2420     @param nimg: the node image object
2421
2422     """
2423     idata = nresult.get(constants.NV_INSTANCELIST, None)
2424     test = not isinstance(idata, list)
2425     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2426                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2427     if test:
2428       nimg.hyp_fail = True
2429     else:
2430       nimg.instances = idata
2431
2432   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2433     """Verifies and computes a node information map
2434
2435     @type ninfo: L{objects.Node}
2436     @param ninfo: the node to check
2437     @param nresult: the remote results for the node
2438     @param nimg: the node image object
2439     @param vg_name: the configured VG name
2440
2441     """
2442     node = ninfo.name
2443     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2444
2445     # try to read free memory (from the hypervisor)
2446     hv_info = nresult.get(constants.NV_HVINFO, None)
2447     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2448     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2449     if not test:
2450       try:
2451         nimg.mfree = int(hv_info["memory_free"])
2452       except (ValueError, TypeError):
2453         _ErrorIf(True, self.ENODERPC, node,
2454                  "node returned invalid nodeinfo, check hypervisor")
2455
2456     # FIXME: devise a free space model for file based instances as well
2457     if vg_name is not None:
2458       test = (constants.NV_VGLIST not in nresult or
2459               vg_name not in nresult[constants.NV_VGLIST])
2460       _ErrorIf(test, self.ENODELVM, node,
2461                "node didn't return data for the volume group '%s'"
2462                " - it is either missing or broken", vg_name)
2463       if not test:
2464         try:
2465           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2466         except (ValueError, TypeError):
2467           _ErrorIf(True, self.ENODERPC, node,
2468                    "node returned invalid LVM info, check LVM status")
2469
2470   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2471     """Gets per-disk status information for all instances.
2472
2473     @type nodelist: list of strings
2474     @param nodelist: Node names
2475     @type node_image: dict of (name, L{objects.Node})
2476     @param node_image: Node objects
2477     @type instanceinfo: dict of (name, L{objects.Instance})
2478     @param instanceinfo: Instance objects
2479     @rtype: {instance: {node: [(succes, payload)]}}
2480     @return: a dictionary of per-instance dictionaries with nodes as
2481         keys and disk information as values; the disk information is a
2482         list of tuples (success, payload)
2483
2484     """
2485     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2486
2487     node_disks = {}
2488     node_disks_devonly = {}
2489     diskless_instances = set()
2490     diskless = constants.DT_DISKLESS
2491
2492     for nname in nodelist:
2493       node_instances = list(itertools.chain(node_image[nname].pinst,
2494                                             node_image[nname].sinst))
2495       diskless_instances.update(inst for inst in node_instances
2496                                 if instanceinfo[inst].disk_template == diskless)
2497       disks = [(inst, disk)
2498                for inst in node_instances
2499                for disk in instanceinfo[inst].disks]
2500
2501       if not disks:
2502         # No need to collect data
2503         continue
2504
2505       node_disks[nname] = disks
2506
2507       # Creating copies as SetDiskID below will modify the objects and that can
2508       # lead to incorrect data returned from nodes
2509       devonly = [dev.Copy() for (_, dev) in disks]
2510
2511       for dev in devonly:
2512         self.cfg.SetDiskID(dev, nname)
2513
2514       node_disks_devonly[nname] = devonly
2515
2516     assert len(node_disks) == len(node_disks_devonly)
2517
2518     # Collect data from all nodes with disks
2519     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2520                                                           node_disks_devonly)
2521
2522     assert len(result) == len(node_disks)
2523
2524     instdisk = {}
2525
2526     for (nname, nres) in result.items():
2527       disks = node_disks[nname]
2528
2529       if nres.offline:
2530         # No data from this node
2531         data = len(disks) * [(False, "node offline")]
2532       else:
2533         msg = nres.fail_msg
2534         _ErrorIf(msg, self.ENODERPC, nname,
2535                  "while getting disk information: %s", msg)
2536         if msg:
2537           # No data from this node
2538           data = len(disks) * [(False, msg)]
2539         else:
2540           data = []
2541           for idx, i in enumerate(nres.payload):
2542             if isinstance(i, (tuple, list)) and len(i) == 2:
2543               data.append(i)
2544             else:
2545               logging.warning("Invalid result from node %s, entry %d: %s",
2546                               nname, idx, i)
2547               data.append((False, "Invalid result from the remote node"))
2548
2549       for ((inst, _), status) in zip(disks, data):
2550         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2551
2552     # Add empty entries for diskless instances.
2553     for inst in diskless_instances:
2554       assert inst not in instdisk
2555       instdisk[inst] = {}
2556
2557     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2558                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2559                       compat.all(isinstance(s, (tuple, list)) and
2560                                  len(s) == 2 for s in statuses)
2561                       for inst, nnames in instdisk.items()
2562                       for nname, statuses in nnames.items())
2563     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2564
2565     return instdisk
2566
2567   @staticmethod
2568   def _SshNodeSelector(group_uuid, all_nodes):
2569     """Create endless iterators for all potential SSH check hosts.
2570
2571     """
2572     nodes = [node for node in all_nodes
2573              if (node.group != group_uuid and
2574                  not node.offline)]
2575     keyfunc = operator.attrgetter("group")
2576
2577     return map(itertools.cycle,
2578                [sorted(map(operator.attrgetter("name"), names))
2579                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2580                                                   keyfunc)])
2581
2582   @classmethod
2583   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2584     """Choose which nodes should talk to which other nodes.
2585
2586     We will make nodes contact all nodes in their group, and one node from
2587     every other group.
2588
2589     @warning: This algorithm has a known issue if one node group is much
2590       smaller than others (e.g. just one node). In such a case all other
2591       nodes will talk to the single node.
2592
2593     """
2594     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2595     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2596
2597     return (online_nodes,
2598             dict((name, sorted([i.next() for i in sel]))
2599                  for name in online_nodes))
2600
2601   def BuildHooksEnv(self):
2602     """Build hooks env.
2603
2604     Cluster-Verify hooks just ran in the post phase and their failure makes
2605     the output be logged in the verify output and the verification to fail.
2606
2607     """
2608     env = {
2609       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2610       }
2611
2612     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2613                for node in self.my_node_info.values())
2614
2615     return env
2616
2617   def BuildHooksNodes(self):
2618     """Build hooks nodes.
2619
2620     """
2621     return ([], self.my_node_names)
2622
2623   def Exec(self, feedback_fn):
2624     """Verify integrity of the node group, performing various test on nodes.
2625
2626     """
2627     # This method has too many local variables. pylint: disable=R0914
2628     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2629
2630     if not self.my_node_names:
2631       # empty node group
2632       feedback_fn("* Empty node group, skipping verification")
2633       return True
2634
2635     self.bad = False
2636     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2637     verbose = self.op.verbose
2638     self._feedback_fn = feedback_fn
2639
2640     vg_name = self.cfg.GetVGName()
2641     drbd_helper = self.cfg.GetDRBDHelper()
2642     cluster = self.cfg.GetClusterInfo()
2643     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2644     hypervisors = cluster.enabled_hypervisors
2645     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2646
2647     i_non_redundant = [] # Non redundant instances
2648     i_non_a_balanced = [] # Non auto-balanced instances
2649     n_offline = 0 # Count of offline nodes
2650     n_drained = 0 # Count of nodes being drained
2651     node_vol_should = {}
2652
2653     # FIXME: verify OS list
2654
2655     # File verification
2656     filemap = _ComputeAncillaryFiles(cluster, False)
2657
2658     # do local checksums
2659     master_node = self.master_node = self.cfg.GetMasterNode()
2660     master_ip = self.cfg.GetMasterIP()
2661
2662     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2663
2664     node_verify_param = {
2665       constants.NV_FILELIST:
2666         utils.UniqueSequence(filename
2667                              for files in filemap
2668                              for filename in files),
2669       constants.NV_NODELIST:
2670         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2671                                   self.all_node_info.values()),
2672       constants.NV_HYPERVISOR: hypervisors,
2673       constants.NV_HVPARAMS:
2674         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2675       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2676                                  for node in node_data_list
2677                                  if not node.offline],
2678       constants.NV_INSTANCELIST: hypervisors,
2679       constants.NV_VERSION: None,
2680       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2681       constants.NV_NODESETUP: None,
2682       constants.NV_TIME: None,
2683       constants.NV_MASTERIP: (master_node, master_ip),
2684       constants.NV_OSLIST: None,
2685       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2686       }
2687
2688     if vg_name is not None:
2689       node_verify_param[constants.NV_VGLIST] = None
2690       node_verify_param[constants.NV_LVLIST] = vg_name
2691       node_verify_param[constants.NV_PVLIST] = [vg_name]
2692       node_verify_param[constants.NV_DRBDLIST] = None
2693
2694     if drbd_helper:
2695       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2696
2697     # bridge checks
2698     # FIXME: this needs to be changed per node-group, not cluster-wide
2699     bridges = set()
2700     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2701     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2702       bridges.add(default_nicpp[constants.NIC_LINK])
2703     for instance in self.my_inst_info.values():
2704       for nic in instance.nics:
2705         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2706         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2707           bridges.add(full_nic[constants.NIC_LINK])
2708
2709     if bridges:
2710       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2711
2712     # Build our expected cluster state
2713     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2714                                                  name=node.name,
2715                                                  vm_capable=node.vm_capable))
2716                       for node in node_data_list)
2717
2718     # Gather OOB paths
2719     oob_paths = []
2720     for node in self.all_node_info.values():
2721       path = _SupportsOob(self.cfg, node)
2722       if path and path not in oob_paths:
2723         oob_paths.append(path)
2724
2725     if oob_paths:
2726       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2727
2728     for instance in self.my_inst_names:
2729       inst_config = self.my_inst_info[instance]
2730
2731       for nname in inst_config.all_nodes:
2732         if nname not in node_image:
2733           gnode = self.NodeImage(name=nname)
2734           gnode.ghost = (nname not in self.all_node_info)
2735           node_image[nname] = gnode
2736
2737       inst_config.MapLVsByNode(node_vol_should)
2738
2739       pnode = inst_config.primary_node
2740       node_image[pnode].pinst.append(instance)
2741
2742       for snode in inst_config.secondary_nodes:
2743         nimg = node_image[snode]
2744         nimg.sinst.append(instance)
2745         if pnode not in nimg.sbp:
2746           nimg.sbp[pnode] = []
2747         nimg.sbp[pnode].append(instance)
2748
2749     # At this point, we have the in-memory data structures complete,
2750     # except for the runtime information, which we'll gather next
2751
2752     # Due to the way our RPC system works, exact response times cannot be
2753     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2754     # time before and after executing the request, we can at least have a time
2755     # window.
2756     nvinfo_starttime = time.time()
2757     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2758                                            node_verify_param,
2759                                            self.cfg.GetClusterName())
2760     nvinfo_endtime = time.time()
2761
2762     if self.extra_lv_nodes and vg_name is not None:
2763       extra_lv_nvinfo = \
2764           self.rpc.call_node_verify(self.extra_lv_nodes,
2765                                     {constants.NV_LVLIST: vg_name},
2766                                     self.cfg.GetClusterName())
2767     else:
2768       extra_lv_nvinfo = {}
2769
2770     all_drbd_map = self.cfg.ComputeDRBDMap()
2771
2772     feedback_fn("* Gathering disk information (%s nodes)" %
2773                 len(self.my_node_names))
2774     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2775                                      self.my_inst_info)
2776
2777     feedback_fn("* Verifying configuration file consistency")
2778
2779     # If not all nodes are being checked, we need to make sure the master node
2780     # and a non-checked vm_capable node are in the list.
2781     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2782     if absent_nodes:
2783       vf_nvinfo = all_nvinfo.copy()
2784       vf_node_info = list(self.my_node_info.values())
2785       additional_nodes = []
2786       if master_node not in self.my_node_info:
2787         additional_nodes.append(master_node)
2788         vf_node_info.append(self.all_node_info[master_node])
2789       # Add the first vm_capable node we find which is not included
2790       for node in absent_nodes:
2791         nodeinfo = self.all_node_info[node]
2792         if nodeinfo.vm_capable and not nodeinfo.offline:
2793           additional_nodes.append(node)
2794           vf_node_info.append(self.all_node_info[node])
2795           break
2796       key = constants.NV_FILELIST
2797       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2798                                                  {key: node_verify_param[key]},
2799                                                  self.cfg.GetClusterName()))
2800     else:
2801       vf_nvinfo = all_nvinfo
2802       vf_node_info = self.my_node_info.values()
2803
2804     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2805
2806     feedback_fn("* Verifying node status")
2807
2808     refos_img = None
2809
2810     for node_i in node_data_list:
2811       node = node_i.name
2812       nimg = node_image[node]
2813
2814       if node_i.offline:
2815         if verbose:
2816           feedback_fn("* Skipping offline node %s" % (node,))
2817         n_offline += 1
2818         continue
2819
2820       if node == master_node:
2821         ntype = "master"
2822       elif node_i.master_candidate:
2823         ntype = "master candidate"
2824       elif node_i.drained:
2825         ntype = "drained"
2826         n_drained += 1
2827       else:
2828         ntype = "regular"
2829       if verbose:
2830         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2831
2832       msg = all_nvinfo[node].fail_msg
2833       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2834       if msg:
2835         nimg.rpc_fail = True
2836         continue
2837
2838       nresult = all_nvinfo[node].payload
2839
2840       nimg.call_ok = self._VerifyNode(node_i, nresult)
2841       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2842       self._VerifyNodeNetwork(node_i, nresult)
2843       self._VerifyOob(node_i, nresult)
2844
2845       if nimg.vm_capable:
2846         self._VerifyNodeLVM(node_i, nresult, vg_name)
2847         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2848                              all_drbd_map)
2849
2850         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2851         self._UpdateNodeInstances(node_i, nresult, nimg)
2852         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2853         self._UpdateNodeOS(node_i, nresult, nimg)
2854
2855         if not nimg.os_fail:
2856           if refos_img is None:
2857             refos_img = nimg
2858           self._VerifyNodeOS(node_i, nimg, refos_img)
2859         self._VerifyNodeBridges(node_i, nresult, bridges)
2860
2861         # Check whether all running instancies are primary for the node. (This
2862         # can no longer be done from _VerifyInstance below, since some of the
2863         # wrong instances could be from other node groups.)
2864         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2865
2866         for inst in non_primary_inst:
2867           test = inst in self.all_inst_info
2868           _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2869                    "instance should not run on node %s", node_i.name)
2870           _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2871                    "node is running unknown instance %s", inst)
2872
2873     for node, result in extra_lv_nvinfo.items():
2874       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2875                               node_image[node], vg_name)
2876
2877     feedback_fn("* Verifying instance status")
2878     for instance in self.my_inst_names:
2879       if verbose:
2880         feedback_fn("* Verifying instance %s" % instance)
2881       inst_config = self.my_inst_info[instance]
2882       self._VerifyInstance(instance, inst_config, node_image,
2883                            instdisk[instance])
2884       inst_nodes_offline = []
2885
2886       pnode = inst_config.primary_node
2887       pnode_img = node_image[pnode]
2888       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2889                self.ENODERPC, pnode, "instance %s, connection to"
2890                " primary node failed", instance)
2891
2892       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2893                self.EINSTANCEBADNODE, instance,
2894                "instance is marked as running and lives on offline node %s",
2895                inst_config.primary_node)
2896
2897       # If the instance is non-redundant we cannot survive losing its primary
2898       # node, so we are not N+1 compliant. On the other hand we have no disk
2899       # templates with more than one secondary so that situation is not well
2900       # supported either.
2901       # FIXME: does not support file-backed instances
2902       if not inst_config.secondary_nodes:
2903         i_non_redundant.append(instance)
2904
2905       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2906                instance, "instance has multiple secondary nodes: %s",
2907                utils.CommaJoin(inst_config.secondary_nodes),
2908                code=self.ETYPE_WARNING)
2909
2910       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2911         pnode = inst_config.primary_node
2912         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2913         instance_groups = {}
2914
2915         for node in instance_nodes:
2916           instance_groups.setdefault(self.all_node_info[node].group,
2917                                      []).append(node)
2918
2919         pretty_list = [
2920           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2921           # Sort so that we always list the primary node first.
2922           for group, nodes in sorted(instance_groups.items(),
2923                                      key=lambda (_, nodes): pnode in nodes,
2924                                      reverse=True)]
2925
2926         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2927                       instance, "instance has primary and secondary nodes in"
2928                       " different groups: %s", utils.CommaJoin(pretty_list),
2929                       code=self.ETYPE_WARNING)
2930
2931       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2932         i_non_a_balanced.append(instance)
2933
2934       for snode in inst_config.secondary_nodes:
2935         s_img = node_image[snode]
2936         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2937                  "instance %s, connection to secondary node failed", instance)
2938
2939         if s_img.offline:
2940           inst_nodes_offline.append(snode)
2941
2942       # warn that the instance lives on offline nodes
2943       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2944                "instance has offline secondary node(s) %s",
2945                utils.CommaJoin(inst_nodes_offline))
2946       # ... or ghost/non-vm_capable nodes
2947       for node in inst_config.all_nodes:
2948         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2949                  "instance lives on ghost node %s", node)
2950         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2951                  instance, "instance lives on non-vm_capable node %s", node)
2952
2953     feedback_fn("* Verifying orphan volumes")
2954     reserved = utils.FieldSet(*cluster.reserved_lvs)
2955
2956     # We will get spurious "unknown volume" warnings if any node of this group
2957     # is secondary for an instance whose primary is in another group. To avoid
2958     # them, we find these instances and add their volumes to node_vol_should.
2959     for inst in self.all_inst_info.values():
2960       for secondary in inst.secondary_nodes:
2961         if (secondary in self.my_node_info
2962             and inst.name not in self.my_inst_info):
2963           inst.MapLVsByNode(node_vol_should)
2964           break
2965
2966     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2967
2968     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2969       feedback_fn("* Verifying N+1 Memory redundancy")
2970       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2971
2972     feedback_fn("* Other Notes")
2973     if i_non_redundant:
2974       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2975                   % len(i_non_redundant))
2976
2977     if i_non_a_balanced:
2978       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2979                   % len(i_non_a_balanced))
2980
2981     if n_offline:
2982       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2983
2984     if n_drained:
2985       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2986
2987     return not self.bad
2988
2989   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2990     """Analyze the post-hooks' result
2991
2992     This method analyses the hook result, handles it, and sends some
2993     nicely-formatted feedback back to the user.
2994
2995     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2996         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2997     @param hooks_results: the results of the multi-node hooks rpc call
2998     @param feedback_fn: function used send feedback back to the caller
2999     @param lu_result: previous Exec result
3000     @return: the new Exec result, based on the previous result
3001         and hook results
3002
3003     """
3004     # We only really run POST phase hooks, only for non-empty groups,
3005     # and are only interested in their results
3006     if not self.my_node_names:
3007       # empty node group
3008       pass
3009     elif phase == constants.HOOKS_PHASE_POST:
3010       # Used to change hooks' output to proper indentation
3011       feedback_fn("* Hooks Results")
3012       assert hooks_results, "invalid result from hooks"
3013
3014       for node_name in hooks_results:
3015         res = hooks_results[node_name]
3016         msg = res.fail_msg
3017         test = msg and not res.offline
3018         self._ErrorIf(test, self.ENODEHOOKS, node_name,
3019                       "Communication failure in hooks execution: %s", msg)
3020         if res.offline or msg:
3021           # No need to investigate payload if node is offline or gave
3022           # an error.
3023           continue
3024         for script, hkr, output in res.payload:
3025           test = hkr == constants.HKR_FAIL
3026           self._ErrorIf(test, self.ENODEHOOKS, node_name,
3027                         "Script %s failed, output:", script)
3028           if test:
3029             output = self._HOOKS_INDENT_RE.sub("      ", output)
3030             feedback_fn("%s" % output)
3031             lu_result = False
3032
3033     return lu_result
3034
3035
3036 class LUClusterVerifyDisks(NoHooksLU):
3037   """Verifies the cluster disks status.
3038
3039   """
3040   REQ_BGL = False
3041
3042   def ExpandNames(self):
3043     self.share_locks = _ShareAll()
3044     self.needed_locks = {
3045       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3046       }
3047
3048   def Exec(self, feedback_fn):
3049     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3050
3051     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3052     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3053                            for group in group_names])
3054
3055
3056 class LUGroupVerifyDisks(NoHooksLU):
3057   """Verifies the status of all disks in a node group.
3058
3059   """
3060   REQ_BGL = False
3061
3062   def ExpandNames(self):
3063     # Raises errors.OpPrereqError on its own if group can't be found
3064     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3065
3066     self.share_locks = _ShareAll()
3067     self.needed_locks = {
3068       locking.LEVEL_INSTANCE: [],
3069       locking.LEVEL_NODEGROUP: [],
3070       locking.LEVEL_NODE: [],
3071       }
3072
3073   def DeclareLocks(self, level):
3074     if level == locking.LEVEL_INSTANCE:
3075       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3076
3077       # Lock instances optimistically, needs verification once node and group
3078       # locks have been acquired
3079       self.needed_locks[locking.LEVEL_INSTANCE] = \
3080         self.cfg.GetNodeGroupInstances(self.group_uuid)
3081
3082     elif level == locking.LEVEL_NODEGROUP:
3083       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3084
3085       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3086         set([self.group_uuid] +
3087             # Lock all groups used by instances optimistically; this requires
3088             # going via the node before it's locked, requiring verification
3089             # later on
3090             [group_uuid
3091              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3092              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3093
3094     elif level == locking.LEVEL_NODE:
3095       # This will only lock the nodes in the group to be verified which contain
3096       # actual instances
3097       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3098       self._LockInstancesNodes()
3099
3100       # Lock all nodes in group to be verified
3101       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3102       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3103       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3104
3105   def CheckPrereq(self):
3106     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3107     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3108     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3109
3110     assert self.group_uuid in owned_groups
3111
3112     # Check if locked instances are still correct
3113     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3114
3115     # Get instance information
3116     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3117
3118     # Check if node groups for locked instances are still correct
3119     for (instance_name, inst) in self.instances.items():
3120       assert owned_nodes.issuperset(inst.all_nodes), \
3121         "Instance %s's nodes changed while we kept the lock" % instance_name
3122
3123       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3124                                              owned_groups)
3125
3126       assert self.group_uuid in inst_groups, \
3127         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3128
3129   def Exec(self, feedback_fn):
3130     """Verify integrity of cluster disks.
3131
3132     @rtype: tuple of three items
3133     @return: a tuple of (dict of node-to-node_error, list of instances
3134         which need activate-disks, dict of instance: (node, volume) for
3135         missing volumes
3136
3137     """
3138     res_nodes = {}
3139     res_instances = set()
3140     res_missing = {}
3141
3142     nv_dict = _MapInstanceDisksToNodes([inst
3143                                         for inst in self.instances.values()
3144                                         if inst.admin_up])
3145
3146     if nv_dict:
3147       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3148                              set(self.cfg.GetVmCapableNodeList()))
3149
3150       node_lvs = self.rpc.call_lv_list(nodes, [])
3151
3152       for (node, node_res) in node_lvs.items():
3153         if node_res.offline:
3154           continue
3155
3156         msg = node_res.fail_msg
3157         if msg:
3158           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3159           res_nodes[node] = msg
3160           continue
3161
3162         for lv_name, (_, _, lv_online) in node_res.payload.items():
3163           inst = nv_dict.pop((node, lv_name), None)
3164           if not (lv_online or inst is None):
3165             res_instances.add(inst)
3166
3167       # any leftover items in nv_dict are missing LVs, let's arrange the data
3168       # better
3169       for key, inst in nv_dict.iteritems():
3170         res_missing.setdefault(inst, []).append(key)
3171
3172     return (res_nodes, list(res_instances), res_missing)
3173
3174
3175 class LUClusterRepairDiskSizes(NoHooksLU):
3176   """Verifies the cluster disks sizes.
3177
3178   """
3179   REQ_BGL = False
3180
3181   def ExpandNames(self):
3182     if self.op.instances:
3183       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3184       self.needed_locks = {
3185         locking.LEVEL_NODE: [],
3186         locking.LEVEL_INSTANCE: self.wanted_names,
3187         }
3188       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3189     else:
3190       self.wanted_names = None
3191       self.needed_locks = {
3192         locking.LEVEL_NODE: locking.ALL_SET,
3193         locking.LEVEL_INSTANCE: locking.ALL_SET,
3194         }
3195     self.share_locks = _ShareAll()
3196
3197   def DeclareLocks(self, level):
3198     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3199       self._LockInstancesNodes(primary_only=True)
3200
3201   def CheckPrereq(self):
3202     """Check prerequisites.
3203
3204     This only checks the optional instance list against the existing names.
3205
3206     """
3207     if self.wanted_names is None:
3208       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3209
3210     self.wanted_instances = \
3211         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3212
3213   def _EnsureChildSizes(self, disk):
3214     """Ensure children of the disk have the needed disk size.
3215
3216     This is valid mainly for DRBD8 and fixes an issue where the
3217     children have smaller disk size.
3218
3219     @param disk: an L{ganeti.objects.Disk} object
3220
3221     """
3222     if disk.dev_type == constants.LD_DRBD8:
3223       assert disk.children, "Empty children for DRBD8?"
3224       fchild = disk.children[0]
3225       mismatch = fchild.size < disk.size
3226       if mismatch:
3227         self.LogInfo("Child disk has size %d, parent %d, fixing",
3228                      fchild.size, disk.size)
3229         fchild.size = disk.size
3230
3231       # and we recurse on this child only, not on the metadev
3232       return self._EnsureChildSizes(fchild) or mismatch
3233     else:
3234       return False
3235
3236   def Exec(self, feedback_fn):
3237     """Verify the size of cluster disks.
3238
3239     """
3240     # TODO: check child disks too
3241     # TODO: check differences in size between primary/secondary nodes
3242     per_node_disks = {}
3243     for instance in self.wanted_instances:
3244       pnode = instance.primary_node
3245       if pnode not in per_node_disks:
3246         per_node_disks[pnode] = []
3247       for idx, disk in enumerate(instance.disks):
3248         per_node_disks[pnode].append((instance, idx, disk))
3249
3250     changed = []
3251     for node, dskl in per_node_disks.items():
3252       newl = [v[2].Copy() for v in dskl]
3253       for dsk in newl:
3254         self.cfg.SetDiskID(dsk, node)
3255       result = self.rpc.call_blockdev_getsize(node, newl)
3256       if result.fail_msg:
3257         self.LogWarning("Failure in blockdev_getsize call to node"
3258                         " %s, ignoring", node)
3259         continue
3260       if len(result.payload) != len(dskl):
3261         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3262                         " result.payload=%s", node, len(dskl), result.payload)
3263         self.LogWarning("Invalid result from node %s, ignoring node results",
3264                         node)
3265         continue
3266       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3267         if size is None:
3268           self.LogWarning("Disk %d of instance %s did not return size"
3269                           " information, ignoring", idx, instance.name)
3270           continue
3271         if not isinstance(size, (int, long)):
3272           self.LogWarning("Disk %d of instance %s did not return valid"
3273                           " size information, ignoring", idx, instance.name)
3274           continue
3275         size = size >> 20
3276         if size != disk.size:
3277           self.LogInfo("Disk %d of instance %s has mismatched size,"
3278                        " correcting: recorded %d, actual %d", idx,
3279                        instance.name, disk.size, size)
3280           disk.size = size
3281           self.cfg.Update(instance, feedback_fn)
3282           changed.append((instance.name, idx, size))
3283         if self._EnsureChildSizes(disk):
3284           self.cfg.Update(instance, feedback_fn)
3285           changed.append((instance.name, idx, disk.size))
3286     return changed
3287
3288
3289 class LUClusterRename(LogicalUnit):
3290   """Rename the cluster.
3291
3292   """
3293   HPATH = "cluster-rename"
3294   HTYPE = constants.HTYPE_CLUSTER
3295
3296   def BuildHooksEnv(self):
3297     """Build hooks env.
3298
3299     """
3300     return {
3301       "OP_TARGET": self.cfg.GetClusterName(),
3302       "NEW_NAME": self.op.name,
3303       }
3304
3305   def BuildHooksNodes(self):
3306     """Build hooks nodes.
3307
3308     """
3309     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3310
3311   def CheckPrereq(self):
3312     """Verify that the passed name is a valid one.
3313
3314     """
3315     hostname = netutils.GetHostname(name=self.op.name,
3316                                     family=self.cfg.GetPrimaryIPFamily())
3317
3318     new_name = hostname.name
3319     self.ip = new_ip = hostname.ip
3320     old_name = self.cfg.GetClusterName()
3321     old_ip = self.cfg.GetMasterIP()
3322     if new_name == old_name and new_ip == old_ip:
3323       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3324                                  " cluster has changed",
3325                                  errors.ECODE_INVAL)
3326     if new_ip != old_ip:
3327       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3328         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3329                                    " reachable on the network" %
3330                                    new_ip, errors.ECODE_NOTUNIQUE)
3331
3332     self.op.name = new_name
3333
3334   def Exec(self, feedback_fn):
3335     """Rename the cluster.
3336
3337     """
3338     clustername = self.op.name
3339     ip = self.ip
3340
3341     # shutdown the master IP
3342     master = self.cfg.GetMasterNode()
3343     result = self.rpc.call_node_deactivate_master_ip(master)
3344     result.Raise("Could not disable the master role")
3345
3346     try:
3347       cluster = self.cfg.GetClusterInfo()
3348       cluster.cluster_name = clustername
3349       cluster.master_ip = ip
3350       self.cfg.Update(cluster, feedback_fn)
3351
3352       # update the known hosts file
3353       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3354       node_list = self.cfg.GetOnlineNodeList()
3355       try:
3356         node_list.remove(master)
3357       except ValueError:
3358         pass
3359       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3360     finally:
3361       result = self.rpc.call_node_activate_master_ip(master)
3362       msg = result.fail_msg
3363       if msg:
3364         self.LogWarning("Could not re-enable the master role on"
3365                         " the master, please restart manually: %s", msg)
3366
3367     return clustername
3368
3369
3370 class LUClusterSetParams(LogicalUnit):
3371   """Change the parameters of the cluster.
3372
3373   """
3374   HPATH = "cluster-modify"
3375   HTYPE = constants.HTYPE_CLUSTER
3376   REQ_BGL = False
3377
3378   def CheckArguments(self):
3379     """Check parameters
3380
3381     """
3382     if self.op.uid_pool:
3383       uidpool.CheckUidPool(self.op.uid_pool)
3384
3385     if self.op.add_uids:
3386       uidpool.CheckUidPool(self.op.add_uids)
3387
3388     if self.op.remove_uids:
3389       uidpool.CheckUidPool(self.op.remove_uids)
3390
3391   def ExpandNames(self):
3392     # FIXME: in the future maybe other cluster params won't require checking on
3393     # all nodes to be modified.
3394     self.needed_locks = {
3395       locking.LEVEL_NODE: locking.ALL_SET,
3396     }
3397     self.share_locks[locking.LEVEL_NODE] = 1
3398
3399   def BuildHooksEnv(self):
3400     """Build hooks env.
3401
3402     """
3403     return {
3404       "OP_TARGET": self.cfg.GetClusterName(),
3405       "NEW_VG_NAME": self.op.vg_name,
3406       }
3407
3408   def BuildHooksNodes(self):
3409     """Build hooks nodes.
3410
3411     """
3412     mn = self.cfg.GetMasterNode()
3413     return ([mn], [mn])
3414
3415   def CheckPrereq(self):
3416     """Check prerequisites.
3417
3418     This checks whether the given params don't conflict and
3419     if the given volume group is valid.
3420
3421     """
3422     if self.op.vg_name is not None and not self.op.vg_name:
3423       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3424         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3425                                    " instances exist", errors.ECODE_INVAL)
3426
3427     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3428       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3429         raise errors.OpPrereqError("Cannot disable drbd helper while"
3430                                    " drbd-based instances exist",
3431                                    errors.ECODE_INVAL)
3432
3433     node_list = self.owned_locks(locking.LEVEL_NODE)
3434
3435     # if vg_name not None, checks given volume group on all nodes
3436     if self.op.vg_name:
3437       vglist = self.rpc.call_vg_list(node_list)
3438       for node in node_list:
3439         msg = vglist[node].fail_msg
3440         if msg:
3441           # ignoring down node
3442           self.LogWarning("Error while gathering data on node %s"
3443                           " (ignoring node): %s", node, msg)
3444           continue
3445         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3446                                               self.op.vg_name,
3447                                               constants.MIN_VG_SIZE)
3448         if vgstatus:
3449           raise errors.OpPrereqError("Error on node '%s': %s" %
3450                                      (node, vgstatus), errors.ECODE_ENVIRON)
3451
3452     if self.op.drbd_helper:
3453       # checks given drbd helper on all nodes
3454       helpers = self.rpc.call_drbd_helper(node_list)
3455       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3456         if ninfo.offline:
3457           self.LogInfo("Not checking drbd helper on offline node %s", node)
3458           continue
3459         msg = helpers[node].fail_msg
3460         if msg:
3461           raise errors.OpPrereqError("Error checking drbd helper on node"
3462                                      " '%s': %s" % (node, msg),
3463                                      errors.ECODE_ENVIRON)
3464         node_helper = helpers[node].payload
3465         if node_helper != self.op.drbd_helper:
3466           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3467                                      (node, node_helper), errors.ECODE_ENVIRON)
3468
3469     self.cluster = cluster = self.cfg.GetClusterInfo()
3470     # validate params changes
3471     if self.op.beparams:
3472       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3473       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3474
3475     if self.op.ndparams:
3476       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3477       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3478
3479       # TODO: we need a more general way to handle resetting
3480       # cluster-level parameters to default values
3481       if self.new_ndparams["oob_program"] == "":
3482         self.new_ndparams["oob_program"] = \
3483             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3484
3485     if self.op.nicparams:
3486       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3487       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3488       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3489       nic_errors = []
3490
3491       # check all instances for consistency
3492       for instance in self.cfg.GetAllInstancesInfo().values():
3493         for nic_idx, nic in enumerate(instance.nics):
3494           params_copy = copy.deepcopy(nic.nicparams)
3495           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3496
3497           # check parameter syntax
3498           try:
3499             objects.NIC.CheckParameterSyntax(params_filled)
3500           except errors.ConfigurationError, err:
3501             nic_errors.append("Instance %s, nic/%d: %s" %
3502                               (instance.name, nic_idx, err))
3503
3504           # if we're moving instances to routed, check that they have an ip
3505           target_mode = params_filled[constants.NIC_MODE]
3506           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3507             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3508                               " address" % (instance.name, nic_idx))
3509       if nic_errors:
3510         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3511                                    "\n".join(nic_errors))
3512
3513     # hypervisor list/parameters
3514     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3515     if self.op.hvparams:
3516       for hv_name, hv_dict in self.op.hvparams.items():
3517         if hv_name not in self.new_hvparams:
3518           self.new_hvparams[hv_name] = hv_dict
3519         else:
3520           self.new_hvparams[hv_name].update(hv_dict)
3521
3522     # os hypervisor parameters
3523     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3524     if self.op.os_hvp:
3525       for os_name, hvs in self.op.os_hvp.items():
3526         if os_name not in self.new_os_hvp:
3527           self.new_os_hvp[os_name] = hvs
3528         else:
3529           for hv_name, hv_dict in hvs.items():
3530             if hv_name not in self.new_os_hvp[os_name]:
3531               self.new_os_hvp[os_name][hv_name] = hv_dict
3532             else:
3533               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3534
3535     # os parameters
3536     self.new_osp = objects.FillDict(cluster.osparams, {})
3537     if self.op.osparams:
3538       for os_name, osp in self.op.osparams.items():
3539         if os_name not in self.new_osp:
3540           self.new_osp[os_name] = {}
3541
3542         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3543                                                   use_none=True)
3544
3545         if not self.new_osp[os_name]:
3546           # we removed all parameters
3547           del self.new_osp[os_name]
3548         else:
3549           # check the parameter validity (remote check)
3550           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3551                          os_name, self.new_osp[os_name])
3552
3553     # changes to the hypervisor list
3554     if self.op.enabled_hypervisors is not None:
3555       self.hv_list = self.op.enabled_hypervisors
3556       for hv in self.hv_list:
3557         # if the hypervisor doesn't already exist in the cluster
3558         # hvparams, we initialize it to empty, and then (in both
3559         # cases) we make sure to fill the defaults, as we might not
3560         # have a complete defaults list if the hypervisor wasn't
3561         # enabled before
3562         if hv not in new_hvp:
3563           new_hvp[hv] = {}
3564         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3565         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3566     else:
3567       self.hv_list = cluster.enabled_hypervisors
3568
3569     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3570       # either the enabled list has changed, or the parameters have, validate
3571       for hv_name, hv_params in self.new_hvparams.items():
3572         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3573             (self.op.enabled_hypervisors and
3574              hv_name in self.op.enabled_hypervisors)):
3575           # either this is a new hypervisor, or its parameters have changed
3576           hv_class = hypervisor.GetHypervisor(hv_name)
3577           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3578           hv_class.CheckParameterSyntax(hv_params)
3579           _CheckHVParams(self, node_list, hv_name, hv_params)
3580
3581     if self.op.os_hvp:
3582       # no need to check any newly-enabled hypervisors, since the
3583       # defaults have already been checked in the above code-block
3584       for os_name, os_hvp in self.new_os_hvp.items():
3585         for hv_name, hv_params in os_hvp.items():
3586           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3587           # we need to fill in the new os_hvp on top of the actual hv_p
3588           cluster_defaults = self.new_hvparams.get(hv_name, {})
3589           new_osp = objects.FillDict(cluster_defaults, hv_params)
3590           hv_class = hypervisor.GetHypervisor(hv_name)
3591           hv_class.CheckParameterSyntax(new_osp)
3592           _CheckHVParams(self, node_list, hv_name, new_osp)
3593
3594     if self.op.default_iallocator:
3595       alloc_script = utils.FindFile(self.op.default_iallocator,
3596                                     constants.IALLOCATOR_SEARCH_PATH,
3597                                     os.path.isfile)
3598       if alloc_script is None:
3599         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3600                                    " specified" % self.op.default_iallocator,
3601                                    errors.ECODE_INVAL)
3602
3603   def Exec(self, feedback_fn):
3604     """Change the parameters of the cluster.
3605
3606     """
3607     if self.op.vg_name is not None:
3608       new_volume = self.op.vg_name
3609       if not new_volume:
3610         new_volume = None
3611       if new_volume != self.cfg.GetVGName():
3612         self.cfg.SetVGName(new_volume)
3613       else:
3614         feedback_fn("Cluster LVM configuration already in desired"
3615                     " state, not changing")
3616     if self.op.drbd_helper is not None:
3617       new_helper = self.op.drbd_helper
3618       if not new_helper:
3619         new_helper = None
3620       if new_helper != self.cfg.GetDRBDHelper():
3621         self.cfg.SetDRBDHelper(new_helper)
3622       else:
3623         feedback_fn("Cluster DRBD helper already in desired state,"
3624                     " not changing")
3625     if self.op.hvparams:
3626       self.cluster.hvparams = self.new_hvparams
3627     if self.op.os_hvp:
3628       self.cluster.os_hvp = self.new_os_hvp
3629     if self.op.enabled_hypervisors is not None:
3630       self.cluster.hvparams = self.new_hvparams
3631       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3632     if self.op.beparams:
3633       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3634     if self.op.nicparams:
3635       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3636     if self.op.osparams:
3637       self.cluster.osparams = self.new_osp
3638     if self.op.ndparams:
3639       self.cluster.ndparams = self.new_ndparams
3640
3641     if self.op.candidate_pool_size is not None:
3642       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3643       # we need to update the pool size here, otherwise the save will fail
3644       _AdjustCandidatePool(self, [])
3645
3646     if self.op.maintain_node_health is not None:
3647       self.cluster.maintain_node_health = self.op.maintain_node_health
3648
3649     if self.op.prealloc_wipe_disks is not None:
3650       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3651
3652     if self.op.add_uids is not None:
3653       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3654
3655     if self.op.remove_uids is not None:
3656       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3657
3658     if self.op.uid_pool is not None:
3659       self.cluster.uid_pool = self.op.uid_pool
3660
3661     if self.op.default_iallocator is not None:
3662       self.cluster.default_iallocator = self.op.default_iallocator
3663
3664     if self.op.reserved_lvs is not None:
3665       self.cluster.reserved_lvs = self.op.reserved_lvs
3666
3667     def helper_os(aname, mods, desc):
3668       desc += " OS list"
3669       lst = getattr(self.cluster, aname)
3670       for key, val in mods:
3671         if key == constants.DDM_ADD:
3672           if val in lst:
3673             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3674           else:
3675             lst.append(val)
3676         elif key == constants.DDM_REMOVE:
3677           if val in lst:
3678             lst.remove(val)
3679           else:
3680             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3681         else:
3682           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3683
3684     if self.op.hidden_os:
3685       helper_os("hidden_os", self.op.hidden_os, "hidden")
3686
3687     if self.op.blacklisted_os:
3688       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3689
3690     if self.op.master_netdev:
3691       master = self.cfg.GetMasterNode()
3692       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3693                   self.cluster.master_netdev)
3694       result = self.rpc.call_node_deactivate_master_ip(master)
3695       result.Raise("Could not disable the master ip")
3696       feedback_fn("Changing master_netdev from %s to %s" %
3697                   (self.cluster.master_netdev, self.op.master_netdev))
3698       self.cluster.master_netdev = self.op.master_netdev
3699
3700     self.cfg.Update(self.cluster, feedback_fn)
3701
3702     if self.op.master_netdev:
3703       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3704                   self.op.master_netdev)
3705       result = self.rpc.call_node_activate_master_ip(master)
3706       if result.fail_msg:
3707         self.LogWarning("Could not re-enable the master ip on"
3708                         " the master, please restart manually: %s",
3709                         result.fail_msg)
3710
3711
3712 def _UploadHelper(lu, nodes, fname):
3713   """Helper for uploading a file and showing warnings.
3714
3715   """
3716   if os.path.exists(fname):
3717     result = lu.rpc.call_upload_file(nodes, fname)
3718     for to_node, to_result in result.items():
3719       msg = to_result.fail_msg
3720       if msg:
3721         msg = ("Copy of file %s to node %s failed: %s" %
3722                (fname, to_node, msg))
3723         lu.proc.LogWarning(msg)
3724
3725
3726 def _ComputeAncillaryFiles(cluster, redist):
3727   """Compute files external to Ganeti which need to be consistent.
3728
3729   @type redist: boolean
3730   @param redist: Whether to include files which need to be redistributed
3731
3732   """
3733   # Compute files for all nodes
3734   files_all = set([
3735     constants.SSH_KNOWN_HOSTS_FILE,
3736     constants.CONFD_HMAC_KEY,
3737     constants.CLUSTER_DOMAIN_SECRET_FILE,
3738     ])
3739
3740   if not redist:
3741     files_all.update(constants.ALL_CERT_FILES)
3742     files_all.update(ssconf.SimpleStore().GetFileList())
3743   else:
3744     # we need to ship at least the RAPI certificate
3745     files_all.add(constants.RAPI_CERT_FILE)
3746
3747   if cluster.modify_etc_hosts:
3748     files_all.add(constants.ETC_HOSTS)
3749
3750   # Files which must either exist on all nodes or on none
3751   files_all_opt = set([
3752     constants.RAPI_USERS_FILE,
3753     ])
3754
3755   # Files which should only be on master candidates
3756   files_mc = set()
3757   if not redist:
3758     files_mc.add(constants.CLUSTER_CONF_FILE)
3759
3760   # Files which should only be on VM-capable nodes
3761   files_vm = set(filename
3762     for hv_name in cluster.enabled_hypervisors
3763     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3764
3765   # Filenames must be unique
3766   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3767           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3768          "Found file listed in more than one file list"
3769
3770   return (files_all, files_all_opt, files_mc, files_vm)
3771
3772
3773 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3774   """Distribute additional files which are part of the cluster configuration.
3775
3776   ConfigWriter takes care of distributing the config and ssconf files, but
3777   there are more files which should be distributed to all nodes. This function
3778   makes sure those are copied.
3779
3780   @param lu: calling logical unit
3781   @param additional_nodes: list of nodes not in the config to distribute to
3782   @type additional_vm: boolean
3783   @param additional_vm: whether the additional nodes are vm-capable or not
3784
3785   """
3786   # Gather target nodes
3787   cluster = lu.cfg.GetClusterInfo()
3788   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3789
3790   online_nodes = lu.cfg.GetOnlineNodeList()
3791   vm_nodes = lu.cfg.GetVmCapableNodeList()
3792
3793   if additional_nodes is not None:
3794     online_nodes.extend(additional_nodes)
3795     if additional_vm:
3796       vm_nodes.extend(additional_nodes)
3797
3798   # Never distribute to master node
3799   for nodelist in [online_nodes, vm_nodes]:
3800     if master_info.name in nodelist:
3801       nodelist.remove(master_info.name)
3802
3803   # Gather file lists
3804   (files_all, files_all_opt, files_mc, files_vm) = \
3805     _ComputeAncillaryFiles(cluster, True)
3806
3807   # Never re-distribute configuration file from here
3808   assert not (constants.CLUSTER_CONF_FILE in files_all or
3809               constants.CLUSTER_CONF_FILE in files_vm)
3810   assert not files_mc, "Master candidates not handled in this function"
3811
3812   filemap = [
3813     (online_nodes, files_all),
3814     (online_nodes, files_all_opt),
3815     (vm_nodes, files_vm),
3816     ]
3817
3818   # Upload the files
3819   for (node_list, files) in filemap:
3820     for fname in files:
3821       _UploadHelper(lu, node_list, fname)
3822
3823
3824 class LUClusterRedistConf(NoHooksLU):
3825   """Force the redistribution of cluster configuration.
3826
3827   This is a very simple LU.
3828
3829   """
3830   REQ_BGL = False
3831
3832   def ExpandNames(self):
3833     self.needed_locks = {
3834       locking.LEVEL_NODE: locking.ALL_SET,
3835     }
3836     self.share_locks[locking.LEVEL_NODE] = 1
3837
3838   def Exec(self, feedback_fn):
3839     """Redistribute the configuration.
3840
3841     """
3842     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3843     _RedistributeAncillaryFiles(self)
3844
3845
3846 class LUClusterActivateMasterIp(NoHooksLU):
3847   """Activate the master IP on the master node.
3848
3849   """
3850   def Exec(self, feedback_fn):
3851     """Activate the master IP.
3852
3853     """
3854     master = self.cfg.GetMasterNode()
3855     self.rpc.call_node_activate_master_ip(master)
3856
3857
3858 class LUClusterDeactivateMasterIp(NoHooksLU):
3859   """Deactivate the master IP on the master node.
3860
3861   """
3862   def Exec(self, feedback_fn):
3863     """Deactivate the master IP.
3864
3865     """
3866     master = self.cfg.GetMasterNode()
3867     self.rpc.call_node_deactivate_master_ip(master)
3868
3869
3870 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3871   """Sleep and poll for an instance's disk to sync.
3872
3873   """
3874   if not instance.disks or disks is not None and not disks:
3875     return True
3876
3877   disks = _ExpandCheckDisks(instance, disks)
3878
3879   if not oneshot:
3880     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3881
3882   node = instance.primary_node
3883
3884   for dev in disks:
3885     lu.cfg.SetDiskID(dev, node)
3886
3887   # TODO: Convert to utils.Retry
3888
3889   retries = 0
3890   degr_retries = 10 # in seconds, as we sleep 1 second each time
3891   while True:
3892     max_time = 0
3893     done = True
3894     cumul_degraded = False
3895     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3896     msg = rstats.fail_msg
3897     if msg:
3898       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3899       retries += 1
3900       if retries >= 10:
3901         raise errors.RemoteError("Can't contact node %s for mirror data,"
3902                                  " aborting." % node)
3903       time.sleep(6)
3904       continue
3905     rstats = rstats.payload
3906     retries = 0
3907     for i, mstat in enumerate(rstats):
3908       if mstat is None:
3909         lu.LogWarning("Can't compute data for node %s/%s",
3910                            node, disks[i].iv_name)
3911         continue
3912
3913       cumul_degraded = (cumul_degraded or
3914                         (mstat.is_degraded and mstat.sync_percent is None))
3915       if mstat.sync_percent is not None:
3916         done = False
3917         if mstat.estimated_time is not None:
3918           rem_time = ("%s remaining (estimated)" %
3919                       utils.FormatSeconds(mstat.estimated_time))
3920           max_time = mstat.estimated_time
3921         else:
3922           rem_time = "no time estimate"
3923         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3924                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3925
3926     # if we're done but degraded, let's do a few small retries, to
3927     # make sure we see a stable and not transient situation; therefore
3928     # we force restart of the loop
3929     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3930       logging.info("Degraded disks found, %d retries left", degr_retries)
3931       degr_retries -= 1
3932       time.sleep(1)
3933       continue
3934
3935     if done or oneshot:
3936       break
3937
3938     time.sleep(min(60, max_time))
3939
3940   if done:
3941     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3942   return not cumul_degraded
3943
3944
3945 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3946   """Check that mirrors are not degraded.
3947
3948   The ldisk parameter, if True, will change the test from the
3949   is_degraded attribute (which represents overall non-ok status for
3950   the device(s)) to the ldisk (representing the local storage status).
3951
3952   """
3953   lu.cfg.SetDiskID(dev, node)
3954
3955   result = True
3956
3957   if on_primary or dev.AssembleOnSecondary():
3958     rstats = lu.rpc.call_blockdev_find(node, dev)
3959     msg = rstats.fail_msg
3960     if msg:
3961       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3962       result = False
3963     elif not rstats.payload:
3964       lu.LogWarning("Can't find disk on node %s", node)
3965       result = False
3966     else:
3967       if ldisk:
3968         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3969       else:
3970         result = result and not rstats.payload.is_degraded
3971
3972   if dev.children:
3973     for child in dev.children:
3974       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3975
3976   return result
3977
3978
3979 class LUOobCommand(NoHooksLU):
3980   """Logical unit for OOB handling.
3981
3982   """
3983   REG_BGL = False
3984   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3985
3986   def ExpandNames(self):
3987     """Gather locks we need.
3988
3989     """
3990     if self.op.node_names:
3991       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3992       lock_names = self.op.node_names
3993     else:
3994       lock_names = locking.ALL_SET
3995
3996     self.needed_locks = {
3997       locking.LEVEL_NODE: lock_names,
3998       }
3999
4000   def CheckPrereq(self):
4001     """Check prerequisites.
4002
4003     This checks:
4004      - the node exists in the configuration
4005      - OOB is supported
4006
4007     Any errors are signaled by raising errors.OpPrereqError.
4008
4009     """
4010     self.nodes = []
4011     self.master_node = self.cfg.GetMasterNode()
4012
4013     assert self.op.power_delay >= 0.0
4014
4015     if self.op.node_names:
4016       if (self.op.command in self._SKIP_MASTER and
4017           self.master_node in self.op.node_names):
4018         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4019         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4020
4021         if master_oob_handler:
4022           additional_text = ("run '%s %s %s' if you want to operate on the"
4023                              " master regardless") % (master_oob_handler,
4024                                                       self.op.command,
4025                                                       self.master_node)
4026         else:
4027           additional_text = "it does not support out-of-band operations"
4028
4029         raise errors.OpPrereqError(("Operating on the master node %s is not"
4030                                     " allowed for %s; %s") %
4031                                    (self.master_node, self.op.command,
4032                                     additional_text), errors.ECODE_INVAL)
4033     else:
4034       self.op.node_names = self.cfg.GetNodeList()
4035       if self.op.command in self._SKIP_MASTER:
4036         self.op.node_names.remove(self.master_node)
4037
4038     if self.op.command in self._SKIP_MASTER:
4039       assert self.master_node not in self.op.node_names
4040
4041     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4042       if node is None:
4043         raise errors.OpPrereqError("Node %s not found" % node_name,
4044                                    errors.ECODE_NOENT)
4045       else:
4046         self.nodes.append(node)
4047
4048       if (not self.op.ignore_status and
4049           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4050         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4051                                     " not marked offline") % node_name,
4052                                    errors.ECODE_STATE)
4053
4054   def Exec(self, feedback_fn):
4055     """Execute OOB and return result if we expect any.
4056
4057     """
4058     master_node = self.master_node
4059     ret = []
4060
4061     for idx, node in enumerate(utils.NiceSort(self.nodes,
4062                                               key=lambda node: node.name)):
4063       node_entry = [(constants.RS_NORMAL, node.name)]
4064       ret.append(node_entry)
4065
4066       oob_program = _SupportsOob(self.cfg, node)
4067
4068       if not oob_program:
4069         node_entry.append((constants.RS_UNAVAIL, None))
4070         continue
4071
4072       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4073                    self.op.command, oob_program, node.name)
4074       result = self.rpc.call_run_oob(master_node, oob_program,
4075                                      self.op.command, node.name,
4076                                      self.op.timeout)
4077
4078       if result.fail_msg:
4079         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4080                         node.name, result.fail_msg)
4081         node_entry.append((constants.RS_NODATA, None))
4082       else:
4083         try:
4084           self._CheckPayload(result)
4085         except errors.OpExecError, err:
4086           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4087                           node.name, err)
4088           node_entry.append((constants.RS_NODATA, None))
4089         else:
4090           if self.op.command == constants.OOB_HEALTH:
4091             # For health we should log important events
4092             for item, status in result.payload:
4093               if status in [constants.OOB_STATUS_WARNING,
4094                             constants.OOB_STATUS_CRITICAL]:
4095                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4096                                 item, node.name, status)
4097
4098           if self.op.command == constants.OOB_POWER_ON:
4099             node.powered = True
4100           elif self.op.command == constants.OOB_POWER_OFF:
4101             node.powered = False
4102           elif self.op.command == constants.OOB_POWER_STATUS:
4103             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4104             if powered != node.powered:
4105               logging.warning(("Recorded power state (%s) of node '%s' does not"
4106                                " match actual power state (%s)"), node.powered,
4107                               node.name, powered)
4108
4109           # For configuration changing commands we should update the node
4110           if self.op.command in (constants.OOB_POWER_ON,
4111                                  constants.OOB_POWER_OFF):
4112             self.cfg.Update(node, feedback_fn)
4113
4114           node_entry.append((constants.RS_NORMAL, result.payload))
4115
4116           if (self.op.command == constants.OOB_POWER_ON and
4117               idx < len(self.nodes) - 1):
4118             time.sleep(self.op.power_delay)
4119
4120     return ret
4121
4122   def _CheckPayload(self, result):
4123     """Checks if the payload is valid.
4124
4125     @param result: RPC result
4126     @raises errors.OpExecError: If payload is not valid
4127
4128     """
4129     errs = []
4130     if self.op.command == constants.OOB_HEALTH:
4131       if not isinstance(result.payload, list):
4132         errs.append("command 'health' is expected to return a list but got %s" %
4133                     type(result.payload))
4134       else:
4135         for item, status in result.payload:
4136           if status not in constants.OOB_STATUSES:
4137             errs.append("health item '%s' has invalid status '%s'" %
4138                         (item, status))
4139
4140     if self.op.command == constants.OOB_POWER_STATUS:
4141       if not isinstance(result.payload, dict):
4142         errs.append("power-status is expected to return a dict but got %s" %
4143                     type(result.payload))
4144
4145     if self.op.command in [
4146         constants.OOB_POWER_ON,
4147         constants.OOB_POWER_OFF,
4148         constants.OOB_POWER_CYCLE,
4149         ]:
4150       if result.payload is not None:
4151         errs.append("%s is expected to not return payload but got '%s'" %
4152                     (self.op.command, result.payload))
4153
4154     if errs:
4155       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4156                                utils.CommaJoin(errs))
4157
4158
4159 class _OsQuery(_QueryBase):
4160   FIELDS = query.OS_FIELDS
4161
4162   def ExpandNames(self, lu):
4163     # Lock all nodes in shared mode
4164     # Temporary removal of locks, should be reverted later
4165     # TODO: reintroduce locks when they are lighter-weight
4166     lu.needed_locks = {}
4167     #self.share_locks[locking.LEVEL_NODE] = 1
4168     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4169
4170     # The following variables interact with _QueryBase._GetNames
4171     if self.names:
4172       self.wanted = self.names
4173     else:
4174       self.wanted = locking.ALL_SET
4175
4176     self.do_locking = self.use_locking
4177
4178   def DeclareLocks(self, lu, level):
4179     pass
4180
4181   @staticmethod
4182   def _DiagnoseByOS(rlist):
4183     """Remaps a per-node return list into an a per-os per-node dictionary
4184
4185     @param rlist: a map with node names as keys and OS objects as values
4186
4187     @rtype: dict
4188     @return: a dictionary with osnames as keys and as value another
4189         map, with nodes as keys and tuples of (path, status, diagnose,
4190         variants, parameters, api_versions) as values, eg::
4191
4192           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4193                                      (/srv/..., False, "invalid api")],
4194                            "node2": [(/srv/..., True, "", [], [])]}
4195           }
4196
4197     """
4198     all_os = {}
4199     # we build here the list of nodes that didn't fail the RPC (at RPC
4200     # level), so that nodes with a non-responding node daemon don't
4201     # make all OSes invalid
4202     good_nodes = [node_name for node_name in rlist
4203                   if not rlist[node_name].fail_msg]
4204     for node_name, nr in rlist.items():
4205       if nr.fail_msg or not nr.payload:
4206         continue
4207       for (name, path, status, diagnose, variants,
4208            params, api_versions) in nr.payload:
4209         if name not in all_os:
4210           # build a list of nodes for this os containing empty lists
4211           # for each node in node_list
4212           all_os[name] = {}
4213           for nname in good_nodes:
4214             all_os[name][nname] = []
4215         # convert params from [name, help] to (name, help)
4216         params = [tuple(v) for v in params]
4217         all_os[name][node_name].append((path, status, diagnose,
4218                                         variants, params, api_versions))
4219     return all_os
4220
4221   def _GetQueryData(self, lu):
4222     """Computes the list of nodes and their attributes.
4223
4224     """
4225     # Locking is not used
4226     assert not (compat.any(lu.glm.is_owned(level)
4227                            for level in locking.LEVELS
4228                            if level != locking.LEVEL_CLUSTER) or
4229                 self.do_locking or self.use_locking)
4230
4231     valid_nodes = [node.name
4232                    for node in lu.cfg.GetAllNodesInfo().values()
4233                    if not node.offline and node.vm_capable]
4234     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4235     cluster = lu.cfg.GetClusterInfo()
4236
4237     data = {}
4238
4239     for (os_name, os_data) in pol.items():
4240       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4241                           hidden=(os_name in cluster.hidden_os),
4242                           blacklisted=(os_name in cluster.blacklisted_os))
4243
4244       variants = set()
4245       parameters = set()
4246       api_versions = set()
4247
4248       for idx, osl in enumerate(os_data.values()):
4249         info.valid = bool(info.valid and osl and osl[0][1])
4250         if not info.valid:
4251           break
4252
4253         (node_variants, node_params, node_api) = osl[0][3:6]
4254         if idx == 0:
4255           # First entry
4256           variants.update(node_variants)
4257           parameters.update(node_params)
4258           api_versions.update(node_api)
4259         else:
4260           # Filter out inconsistent values
4261           variants.intersection_update(node_variants)
4262           parameters.intersection_update(node_params)
4263           api_versions.intersection_update(node_api)
4264
4265       info.variants = list(variants)
4266       info.parameters = list(parameters)
4267       info.api_versions = list(api_versions)
4268
4269       data[os_name] = info
4270
4271     # Prepare data in requested order
4272     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4273             if name in data]
4274
4275
4276 class LUOsDiagnose(NoHooksLU):
4277   """Logical unit for OS diagnose/query.
4278
4279   """
4280   REQ_BGL = False
4281
4282   @staticmethod
4283   def _BuildFilter(fields, names):
4284     """Builds a filter for querying OSes.
4285
4286     """
4287     name_filter = qlang.MakeSimpleFilter("name", names)
4288
4289     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4290     # respective field is not requested
4291     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4292                      for fname in ["hidden", "blacklisted"]
4293                      if fname not in fields]
4294     if "valid" not in fields:
4295       status_filter.append([qlang.OP_TRUE, "valid"])
4296
4297     if status_filter:
4298       status_filter.insert(0, qlang.OP_AND)
4299     else:
4300       status_filter = None
4301
4302     if name_filter and status_filter:
4303       return [qlang.OP_AND, name_filter, status_filter]
4304     elif name_filter:
4305       return name_filter
4306     else:
4307       return status_filter
4308
4309   def CheckArguments(self):
4310     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4311                        self.op.output_fields, False)
4312
4313   def ExpandNames(self):
4314     self.oq.ExpandNames(self)
4315
4316   def Exec(self, feedback_fn):
4317     return self.oq.OldStyleQuery(self)
4318
4319
4320 class LUNodeRemove(LogicalUnit):
4321   """Logical unit for removing a node.
4322
4323   """
4324   HPATH = "node-remove"
4325   HTYPE = constants.HTYPE_NODE
4326
4327   def BuildHooksEnv(self):
4328     """Build hooks env.
4329
4330     This doesn't run on the target node in the pre phase as a failed
4331     node would then be impossible to remove.
4332
4333     """
4334     return {
4335       "OP_TARGET": self.op.node_name,
4336       "NODE_NAME": self.op.node_name,
4337       }
4338
4339   def BuildHooksNodes(self):
4340     """Build hooks nodes.
4341
4342     """
4343     all_nodes = self.cfg.GetNodeList()
4344     try:
4345       all_nodes.remove(self.op.node_name)
4346     except ValueError:
4347       logging.warning("Node '%s', which is about to be removed, was not found"
4348                       " in the list of all nodes", self.op.node_name)
4349     return (all_nodes, all_nodes)
4350
4351   def CheckPrereq(self):
4352     """Check prerequisites.
4353
4354     This checks:
4355      - the node exists in the configuration
4356      - it does not have primary or secondary instances
4357      - it's not the master
4358
4359     Any errors are signaled by raising errors.OpPrereqError.
4360
4361     """
4362     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4363     node = self.cfg.GetNodeInfo(self.op.node_name)
4364     assert node is not None
4365
4366     masternode = self.cfg.GetMasterNode()
4367     if node.name == masternode:
4368       raise errors.OpPrereqError("Node is the master node, failover to another"
4369                                  " node is required", errors.ECODE_INVAL)
4370
4371     for instance_name, instance in self.cfg.GetAllInstancesInfo():
4372       if node.name in instance.all_nodes:
4373         raise errors.OpPrereqError("Instance %s is still running on the node,"
4374                                    " please remove first" % instance_name,
4375                                    errors.ECODE_INVAL)
4376     self.op.node_name = node.name
4377     self.node = node
4378
4379   def Exec(self, feedback_fn):
4380     """Removes the node from the cluster.
4381
4382     """
4383     node = self.node
4384     logging.info("Stopping the node daemon and removing configs from node %s",
4385                  node.name)
4386
4387     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4388
4389     # Promote nodes to master candidate as needed
4390     _AdjustCandidatePool(self, exceptions=[node.name])
4391     self.context.RemoveNode(node.name)
4392
4393     # Run post hooks on the node before it's removed
4394     _RunPostHook(self, node.name)
4395
4396     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4397     msg = result.fail_msg
4398     if msg:
4399       self.LogWarning("Errors encountered on the remote node while leaving"
4400                       " the cluster: %s", msg)
4401
4402     # Remove node from our /etc/hosts
4403     if self.cfg.GetClusterInfo().modify_etc_hosts:
4404       master_node = self.cfg.GetMasterNode()
4405       result = self.rpc.call_etc_hosts_modify(master_node,
4406                                               constants.ETC_HOSTS_REMOVE,
4407                                               node.name, None)
4408       result.Raise("Can't update hosts file with new host data")
4409       _RedistributeAncillaryFiles(self)
4410
4411
4412 class _NodeQuery(_QueryBase):
4413   FIELDS = query.NODE_FIELDS
4414
4415   def ExpandNames(self, lu):
4416     lu.needed_locks = {}
4417     lu.share_locks = _ShareAll()
4418
4419     if self.names:
4420       self.wanted = _GetWantedNodes(lu, self.names)
4421     else:
4422       self.wanted = locking.ALL_SET
4423
4424     self.do_locking = (self.use_locking and
4425                        query.NQ_LIVE in self.requested_data)
4426
4427     if self.do_locking:
4428       # If any non-static field is requested we need to lock the nodes
4429       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4430
4431   def DeclareLocks(self, lu, level):
4432     pass
4433
4434   def _GetQueryData(self, lu):
4435     """Computes the list of nodes and their attributes.
4436
4437     """
4438     all_info = lu.cfg.GetAllNodesInfo()
4439
4440     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4441
4442     # Gather data as requested
4443     if query.NQ_LIVE in self.requested_data:
4444       # filter out non-vm_capable nodes
4445       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4446
4447       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4448                                         lu.cfg.GetHypervisorType())
4449       live_data = dict((name, nresult.payload)
4450                        for (name, nresult) in node_data.items()
4451                        if not nresult.fail_msg and nresult.payload)
4452     else:
4453       live_data = None
4454
4455     if query.NQ_INST in self.requested_data:
4456       node_to_primary = dict([(name, set()) for name in nodenames])
4457       node_to_secondary = dict([(name, set()) for name in nodenames])
4458
4459       inst_data = lu.cfg.GetAllInstancesInfo()
4460
4461       for inst in inst_data.values():
4462         if inst.primary_node in node_to_primary:
4463           node_to_primary[inst.primary_node].add(inst.name)
4464         for secnode in inst.secondary_nodes:
4465           if secnode in node_to_secondary:
4466             node_to_secondary[secnode].add(inst.name)
4467     else:
4468       node_to_primary = None
4469       node_to_secondary = None
4470
4471     if query.NQ_OOB in self.requested_data:
4472       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4473                          for name, node in all_info.iteritems())
4474     else:
4475       oob_support = None
4476
4477     if query.NQ_GROUP in self.requested_data:
4478       groups = lu.cfg.GetAllNodeGroupsInfo()
4479     else:
4480       groups = {}
4481
4482     return query.NodeQueryData([all_info[name] for name in nodenames],
4483                                live_data, lu.cfg.GetMasterNode(),
4484                                node_to_primary, node_to_secondary, groups,
4485                                oob_support, lu.cfg.GetClusterInfo())
4486
4487
4488 class LUNodeQuery(NoHooksLU):
4489   """Logical unit for querying nodes.
4490
4491   """
4492   # pylint: disable=W0142
4493   REQ_BGL = False
4494
4495   def CheckArguments(self):
4496     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4497                          self.op.output_fields, self.op.use_locking)
4498
4499   def ExpandNames(self):
4500     self.nq.ExpandNames(self)
4501
4502   def Exec(self, feedback_fn):
4503     return self.nq.OldStyleQuery(self)
4504
4505
4506 class LUNodeQueryvols(NoHooksLU):
4507   """Logical unit for getting volumes on node(s).
4508
4509   """
4510   REQ_BGL = False
4511   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4512   _FIELDS_STATIC = utils.FieldSet("node")
4513
4514   def CheckArguments(self):
4515     _CheckOutputFields(static=self._FIELDS_STATIC,
4516                        dynamic=self._FIELDS_DYNAMIC,
4517                        selected=self.op.output_fields)
4518
4519   def ExpandNames(self):
4520     self.needed_locks = {}
4521     self.share_locks[locking.LEVEL_NODE] = 1
4522     if not self.op.nodes:
4523       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4524     else:
4525       self.needed_locks[locking.LEVEL_NODE] = \
4526         _GetWantedNodes(self, self.op.nodes)
4527
4528   def Exec(self, feedback_fn):
4529     """Computes the list of nodes and their attributes.
4530
4531     """
4532     nodenames = self.owned_locks(locking.LEVEL_NODE)
4533     volumes = self.rpc.call_node_volumes(nodenames)
4534
4535     ilist = self.cfg.GetAllInstancesInfo()
4536     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4537
4538     output = []
4539     for node in nodenames:
4540       nresult = volumes[node]
4541       if nresult.offline:
4542         continue
4543       msg = nresult.fail_msg
4544       if msg:
4545         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4546         continue
4547
4548       node_vols = sorted(nresult.payload,
4549                          key=operator.itemgetter("dev"))
4550
4551       for vol in node_vols:
4552         node_output = []
4553         for field in self.op.output_fields:
4554           if field == "node":
4555             val = node
4556           elif field == "phys":
4557             val = vol["dev"]
4558           elif field == "vg":
4559             val = vol["vg"]
4560           elif field == "name":
4561             val = vol["name"]
4562           elif field == "size":
4563             val = int(float(vol["size"]))
4564           elif field == "instance":
4565             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4566           else:
4567             raise errors.ParameterError(field)
4568           node_output.append(str(val))
4569
4570         output.append(node_output)
4571
4572     return output
4573
4574
4575 class LUNodeQueryStorage(NoHooksLU):
4576   """Logical unit for getting information on storage units on node(s).
4577
4578   """
4579   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4580   REQ_BGL = False
4581
4582   def CheckArguments(self):
4583     _CheckOutputFields(static=self._FIELDS_STATIC,
4584                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4585                        selected=self.op.output_fields)
4586
4587   def ExpandNames(self):
4588     self.needed_locks = {}
4589     self.share_locks[locking.LEVEL_NODE] = 1
4590
4591     if self.op.nodes:
4592       self.needed_locks[locking.LEVEL_NODE] = \
4593         _GetWantedNodes(self, self.op.nodes)
4594     else:
4595       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4596
4597   def Exec(self, feedback_fn):
4598     """Computes the list of nodes and their attributes.
4599
4600     """
4601     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4602
4603     # Always get name to sort by
4604     if constants.SF_NAME in self.op.output_fields:
4605       fields = self.op.output_fields[:]
4606     else:
4607       fields = [constants.SF_NAME] + self.op.output_fields
4608
4609     # Never ask for node or type as it's only known to the LU
4610     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4611       while extra in fields:
4612         fields.remove(extra)
4613
4614     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4615     name_idx = field_idx[constants.SF_NAME]
4616
4617     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4618     data = self.rpc.call_storage_list(self.nodes,
4619                                       self.op.storage_type, st_args,
4620                                       self.op.name, fields)
4621
4622     result = []
4623
4624     for node in utils.NiceSort(self.nodes):
4625       nresult = data[node]
4626       if nresult.offline:
4627         continue
4628
4629       msg = nresult.fail_msg
4630       if msg:
4631         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4632         continue
4633
4634       rows = dict([(row[name_idx], row) for row in nresult.payload])
4635
4636       for name in utils.NiceSort(rows.keys()):
4637         row = rows[name]
4638
4639         out = []
4640
4641         for field in self.op.output_fields:
4642           if field == constants.SF_NODE:
4643             val = node
4644           elif field == constants.SF_TYPE:
4645             val = self.op.storage_type
4646           elif field in field_idx:
4647             val = row[field_idx[field]]
4648           else:
4649             raise errors.ParameterError(field)
4650
4651           out.append(val)
4652
4653         result.append(out)
4654
4655     return result
4656
4657
4658 class _InstanceQuery(_QueryBase):
4659   FIELDS = query.INSTANCE_FIELDS
4660
4661   def ExpandNames(self, lu):
4662     lu.needed_locks = {}
4663     lu.share_locks = _ShareAll()
4664
4665     if self.names:
4666       self.wanted = _GetWantedInstances(lu, self.names)
4667     else:
4668       self.wanted = locking.ALL_SET
4669
4670     self.do_locking = (self.use_locking and
4671                        query.IQ_LIVE in self.requested_data)
4672     if self.do_locking:
4673       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4674       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4675       lu.needed_locks[locking.LEVEL_NODE] = []
4676       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4677
4678     self.do_grouplocks = (self.do_locking and
4679                           query.IQ_NODES in self.requested_data)
4680
4681   def DeclareLocks(self, lu, level):
4682     if self.do_locking:
4683       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4684         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4685
4686         # Lock all groups used by instances optimistically; this requires going
4687         # via the node before it's locked, requiring verification later on
4688         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4689           set(group_uuid
4690               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4691               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4692       elif level == locking.LEVEL_NODE:
4693         lu._LockInstancesNodes() # pylint: disable=W0212
4694
4695   @staticmethod
4696   def _CheckGroupLocks(lu):
4697     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4698     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4699
4700     # Check if node groups for locked instances are still correct
4701     for instance_name in owned_instances:
4702       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4703
4704   def _GetQueryData(self, lu):
4705     """Computes the list of instances and their attributes.
4706
4707     """
4708     if self.do_grouplocks:
4709       self._CheckGroupLocks(lu)
4710
4711     cluster = lu.cfg.GetClusterInfo()
4712     all_info = lu.cfg.GetAllInstancesInfo()
4713
4714     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4715
4716     instance_list = [all_info[name] for name in instance_names]
4717     nodes = frozenset(itertools.chain(*(inst.all_nodes
4718                                         for inst in instance_list)))
4719     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4720     bad_nodes = []
4721     offline_nodes = []
4722     wrongnode_inst = set()
4723
4724     # Gather data as requested
4725     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4726       live_data = {}
4727       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4728       for name in nodes:
4729         result = node_data[name]
4730         if result.offline:
4731           # offline nodes will be in both lists
4732           assert result.fail_msg
4733           offline_nodes.append(name)
4734         if result.fail_msg:
4735           bad_nodes.append(name)
4736         elif result.payload:
4737           for inst in result.payload:
4738             if inst in all_info:
4739               if all_info[inst].primary_node == name:
4740                 live_data.update(result.payload)
4741               else:
4742                 wrongnode_inst.add(inst)
4743             else:
4744               # orphan instance; we don't list it here as we don't
4745               # handle this case yet in the output of instance listing
4746               logging.warning("Orphan instance '%s' found on node %s",
4747                               inst, name)
4748         # else no instance is alive
4749     else:
4750       live_data = {}
4751
4752     if query.IQ_DISKUSAGE in self.requested_data:
4753       disk_usage = dict((inst.name,
4754                          _ComputeDiskSize(inst.disk_template,
4755                                           [{constants.IDISK_SIZE: disk.size}
4756                                            for disk in inst.disks]))
4757                         for inst in instance_list)
4758     else:
4759       disk_usage = None
4760
4761     if query.IQ_CONSOLE in self.requested_data:
4762       consinfo = {}
4763       for inst in instance_list:
4764         if inst.name in live_data:
4765           # Instance is running
4766           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4767         else:
4768           consinfo[inst.name] = None
4769       assert set(consinfo.keys()) == set(instance_names)
4770     else:
4771       consinfo = None
4772
4773     if query.IQ_NODES in self.requested_data:
4774       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4775                                             instance_list)))
4776       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4777       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4778                     for uuid in set(map(operator.attrgetter("group"),
4779                                         nodes.values())))
4780     else:
4781       nodes = None
4782       groups = None
4783
4784     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4785                                    disk_usage, offline_nodes, bad_nodes,
4786                                    live_data, wrongnode_inst, consinfo,
4787                                    nodes, groups)
4788
4789
4790 class LUQuery(NoHooksLU):
4791   """Query for resources/items of a certain kind.
4792
4793   """
4794   # pylint: disable=W0142
4795   REQ_BGL = False
4796
4797   def CheckArguments(self):
4798     qcls = _GetQueryImplementation(self.op.what)
4799
4800     self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4801
4802   def ExpandNames(self):
4803     self.impl.ExpandNames(self)
4804
4805   def DeclareLocks(self, level):
4806     self.impl.DeclareLocks(self, level)
4807
4808   def Exec(self, feedback_fn):
4809     return self.impl.NewStyleQuery(self)
4810
4811
4812 class LUQueryFields(NoHooksLU):
4813   """Query for resources/items of a certain kind.
4814
4815   """
4816   # pylint: disable=W0142
4817   REQ_BGL = False
4818
4819   def CheckArguments(self):
4820     self.qcls = _GetQueryImplementation(self.op.what)
4821
4822   def ExpandNames(self):
4823     self.needed_locks = {}
4824
4825   def Exec(self, feedback_fn):
4826     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4827
4828
4829 class LUNodeModifyStorage(NoHooksLU):
4830   """Logical unit for modifying a storage volume on a node.
4831
4832   """
4833   REQ_BGL = False
4834
4835   def CheckArguments(self):
4836     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4837
4838     storage_type = self.op.storage_type
4839
4840     try:
4841       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4842     except KeyError:
4843       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4844                                  " modified" % storage_type,
4845                                  errors.ECODE_INVAL)
4846
4847     diff = set(self.op.changes.keys()) - modifiable
4848     if diff:
4849       raise errors.OpPrereqError("The following fields can not be modified for"
4850                                  " storage units of type '%s': %r" %
4851                                  (storage_type, list(diff)),
4852                                  errors.ECODE_INVAL)
4853
4854   def ExpandNames(self):
4855     self.needed_locks = {
4856       locking.LEVEL_NODE: self.op.node_name,
4857       }
4858
4859   def Exec(self, feedback_fn):
4860     """Computes the list of nodes and their attributes.
4861
4862     """
4863     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4864     result = self.rpc.call_storage_modify(self.op.node_name,
4865                                           self.op.storage_type, st_args,
4866                                           self.op.name, self.op.changes)
4867     result.Raise("Failed to modify storage unit '%s' on %s" %
4868                  (self.op.name, self.op.node_name))
4869
4870
4871 class LUNodeAdd(LogicalUnit):
4872   """Logical unit for adding node to the cluster.
4873
4874   """
4875   HPATH = "node-add"
4876   HTYPE = constants.HTYPE_NODE
4877   _NFLAGS = ["master_capable", "vm_capable"]
4878
4879   def CheckArguments(self):
4880     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4881     # validate/normalize the node name
4882     self.hostname = netutils.GetHostname(name=self.op.node_name,
4883                                          family=self.primary_ip_family)
4884     self.op.node_name = self.hostname.name
4885
4886     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4887       raise errors.OpPrereqError("Cannot readd the master node",
4888                                  errors.ECODE_STATE)
4889
4890     if self.op.readd and self.op.group:
4891       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4892                                  " being readded", errors.ECODE_INVAL)
4893
4894   def BuildHooksEnv(self):
4895     """Build hooks env.
4896
4897     This will run on all nodes before, and on all nodes + the new node after.
4898
4899     """
4900     return {
4901       "OP_TARGET": self.op.node_name,
4902       "NODE_NAME": self.op.node_name,
4903       "NODE_PIP": self.op.primary_ip,
4904       "NODE_SIP": self.op.secondary_ip,
4905       "MASTER_CAPABLE": str(self.op.master_capable),
4906       "VM_CAPABLE": str(self.op.vm_capable),
4907       }
4908
4909   def BuildHooksNodes(self):
4910     """Build hooks nodes.
4911
4912     """
4913     # Exclude added node
4914     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4915     post_nodes = pre_nodes + [self.op.node_name, ]
4916
4917     return (pre_nodes, post_nodes)
4918
4919   def CheckPrereq(self):
4920     """Check prerequisites.
4921
4922     This checks:
4923      - the new node is not already in the config
4924      - it is resolvable
4925      - its parameters (single/dual homed) matches the cluster
4926
4927     Any errors are signaled by raising errors.OpPrereqError.
4928
4929     """
4930     cfg = self.cfg
4931     hostname = self.hostname
4932     node = hostname.name
4933     primary_ip = self.op.primary_ip = hostname.ip
4934     if self.op.secondary_ip is None:
4935       if self.primary_ip_family == netutils.IP6Address.family:
4936         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4937                                    " IPv4 address must be given as secondary",
4938                                    errors.ECODE_INVAL)
4939       self.op.secondary_ip = primary_ip
4940
4941     secondary_ip = self.op.secondary_ip
4942     if not netutils.IP4Address.IsValid(secondary_ip):
4943       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4944                                  " address" % secondary_ip, errors.ECODE_INVAL)
4945
4946     node_list = cfg.GetNodeList()
4947     if not self.op.readd and node in node_list:
4948       raise errors.OpPrereqError("Node %s is already in the configuration" %
4949                                  node, errors.ECODE_EXISTS)
4950     elif self.op.readd and node not in node_list:
4951       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4952                                  errors.ECODE_NOENT)
4953
4954     self.changed_primary_ip = False
4955
4956     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4957       if self.op.readd and node == existing_node_name:
4958         if existing_node.secondary_ip != secondary_ip:
4959           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4960                                      " address configuration as before",
4961                                      errors.ECODE_INVAL)
4962         if existing_node.primary_ip != primary_ip:
4963           self.changed_primary_ip = True
4964
4965         continue
4966
4967       if (existing_node.primary_ip == primary_ip or
4968           existing_node.secondary_ip == primary_ip or
4969           existing_node.primary_ip == secondary_ip or
4970           existing_node.secondary_ip == secondary_ip):
4971         raise errors.OpPrereqError("New node ip address(es) conflict with"
4972                                    " existing node %s" % existing_node.name,
4973                                    errors.ECODE_NOTUNIQUE)
4974
4975     # After this 'if' block, None is no longer a valid value for the
4976     # _capable op attributes
4977     if self.op.readd:
4978       old_node = self.cfg.GetNodeInfo(node)
4979       assert old_node is not None, "Can't retrieve locked node %s" % node
4980       for attr in self._NFLAGS:
4981         if getattr(self.op, attr) is None:
4982           setattr(self.op, attr, getattr(old_node, attr))
4983     else:
4984       for attr in self._NFLAGS:
4985         if getattr(self.op, attr) is None:
4986           setattr(self.op, attr, True)
4987
4988     if self.op.readd and not self.op.vm_capable:
4989       pri, sec = cfg.GetNodeInstances(node)
4990       if pri or sec:
4991         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4992                                    " flag set to false, but it already holds"
4993                                    " instances" % node,
4994                                    errors.ECODE_STATE)
4995
4996     # check that the type of the node (single versus dual homed) is the
4997     # same as for the master
4998     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4999     master_singlehomed = myself.secondary_ip == myself.primary_ip
5000     newbie_singlehomed = secondary_ip == primary_ip
5001     if master_singlehomed != newbie_singlehomed:
5002       if master_singlehomed:
5003         raise errors.OpPrereqError("The master has no secondary ip but the"
5004                                    " new node has one",
5005                                    errors.ECODE_INVAL)
5006       else:
5007         raise errors.OpPrereqError("The master has a secondary ip but the"
5008                                    " new node doesn't have one",
5009                                    errors.ECODE_INVAL)
5010
5011     # checks reachability
5012     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5013       raise errors.OpPrereqError("Node not reachable by ping",
5014                                  errors.ECODE_ENVIRON)
5015
5016     if not newbie_singlehomed:
5017       # check reachability from my secondary ip to newbie's secondary ip
5018       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5019                            source=myself.secondary_ip):
5020         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5021                                    " based ping to node daemon port",
5022                                    errors.ECODE_ENVIRON)
5023
5024     if self.op.readd:
5025       exceptions = [node]
5026     else:
5027       exceptions = []
5028
5029     if self.op.master_capable:
5030       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5031     else:
5032       self.master_candidate = False
5033
5034     if self.op.readd:
5035       self.new_node = old_node
5036     else:
5037       node_group = cfg.LookupNodeGroup(self.op.group)
5038       self.new_node = objects.Node(name=node,
5039                                    primary_ip=primary_ip,
5040                                    secondary_ip=secondary_ip,
5041                                    master_candidate=self.master_candidate,
5042                                    offline=False, drained=False,
5043                                    group=node_group)
5044
5045     if self.op.ndparams:
5046       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5047
5048   def Exec(self, feedback_fn):
5049     """Adds the new node to the cluster.
5050
5051     """
5052     new_node = self.new_node
5053     node = new_node.name
5054
5055     # We adding a new node so we assume it's powered
5056     new_node.powered = True
5057
5058     # for re-adds, reset the offline/drained/master-candidate flags;
5059     # we need to reset here, otherwise offline would prevent RPC calls
5060     # later in the procedure; this also means that if the re-add
5061     # fails, we are left with a non-offlined, broken node
5062     if self.op.readd:
5063       new_node.drained = new_node.offline = False # pylint: disable=W0201
5064       self.LogInfo("Readding a node, the offline/drained flags were reset")
5065       # if we demote the node, we do cleanup later in the procedure
5066       new_node.master_candidate = self.master_candidate
5067       if self.changed_primary_ip:
5068         new_node.primary_ip = self.op.primary_ip
5069
5070     # copy the master/vm_capable flags
5071     for attr in self._NFLAGS:
5072       setattr(new_node, attr, getattr(self.op, attr))
5073
5074     # notify the user about any possible mc promotion
5075     if new_node.master_candidate:
5076       self.LogInfo("Node will be a master candidate")
5077
5078     if self.op.ndparams:
5079       new_node.ndparams = self.op.ndparams
5080     else:
5081       new_node.ndparams = {}
5082
5083     # check connectivity
5084     result = self.rpc.call_version([node])[node]
5085     result.Raise("Can't get version information from node %s" % node)
5086     if constants.PROTOCOL_VERSION == result.payload:
5087       logging.info("Communication to node %s fine, sw version %s match",
5088                    node, result.payload)
5089     else:
5090       raise errors.OpExecError("Version mismatch master version %s,"
5091                                " node version %s" %
5092                                (constants.PROTOCOL_VERSION, result.payload))
5093
5094     # Add node to our /etc/hosts, and add key to known_hosts
5095     if self.cfg.GetClusterInfo().modify_etc_hosts:
5096       master_node = self.cfg.GetMasterNode()
5097       result = self.rpc.call_etc_hosts_modify(master_node,
5098                                               constants.ETC_HOSTS_ADD,
5099                                               self.hostname.name,
5100                                               self.hostname.ip)
5101       result.Raise("Can't update hosts file with new host data")
5102
5103     if new_node.secondary_ip != new_node.primary_ip:
5104       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5105                                False)
5106
5107     node_verify_list = [self.cfg.GetMasterNode()]
5108     node_verify_param = {
5109       constants.NV_NODELIST: ([node], {}),
5110       # TODO: do a node-net-test as well?
5111     }
5112
5113     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5114                                        self.cfg.GetClusterName())
5115     for verifier in node_verify_list:
5116       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5117       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5118       if nl_payload:
5119         for failed in nl_payload:
5120           feedback_fn("ssh/hostname verification failed"
5121                       " (checking from %s): %s" %
5122                       (verifier, nl_payload[failed]))
5123         raise errors.OpExecError("ssh/hostname verification failed")
5124
5125     if self.op.readd:
5126       _RedistributeAncillaryFiles(self)
5127       self.context.ReaddNode(new_node)
5128       # make sure we redistribute the config
5129       self.cfg.Update(new_node, feedback_fn)
5130       # and make sure the new node will not have old files around
5131       if not new_node.master_candidate:
5132         result = self.rpc.call_node_demote_from_mc(new_node.name)
5133         msg = result.fail_msg
5134         if msg:
5135           self.LogWarning("Node failed to demote itself from master"
5136                           " candidate status: %s" % msg)
5137     else:
5138       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5139                                   additional_vm=self.op.vm_capable)
5140       self.context.AddNode(new_node, self.proc.GetECId())
5141
5142
5143 class LUNodeSetParams(LogicalUnit):
5144   """Modifies the parameters of a node.
5145
5146   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5147       to the node role (as _ROLE_*)
5148   @cvar _R2F: a dictionary from node role to tuples of flags
5149   @cvar _FLAGS: a list of attribute names corresponding to the flags
5150
5151   """
5152   HPATH = "node-modify"
5153   HTYPE = constants.HTYPE_NODE
5154   REQ_BGL = False
5155   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5156   _F2R = {
5157     (True, False, False): _ROLE_CANDIDATE,
5158     (False, True, False): _ROLE_DRAINED,
5159     (False, False, True): _ROLE_OFFLINE,
5160     (False, False, False): _ROLE_REGULAR,
5161     }
5162   _R2F = dict((v, k) for k, v in _F2R.items())
5163   _FLAGS = ["master_candidate", "drained", "offline"]
5164
5165   def CheckArguments(self):
5166     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5167     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5168                 self.op.master_capable, self.op.vm_capable,
5169                 self.op.secondary_ip, self.op.ndparams]
5170     if all_mods.count(None) == len(all_mods):
5171       raise errors.OpPrereqError("Please pass at least one modification",
5172                                  errors.ECODE_INVAL)
5173     if all_mods.count(True) > 1:
5174       raise errors.OpPrereqError("Can't set the node into more than one"
5175                                  " state at the same time",
5176                                  errors.ECODE_INVAL)
5177
5178     # Boolean value that tells us whether we might be demoting from MC
5179     self.might_demote = (self.op.master_candidate == False or
5180                          self.op.offline == True or
5181                          self.op.drained == True or
5182                          self.op.master_capable == False)
5183
5184     if self.op.secondary_ip:
5185       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5186         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5187                                    " address" % self.op.secondary_ip,
5188                                    errors.ECODE_INVAL)
5189
5190     self.lock_all = self.op.auto_promote and self.might_demote
5191     self.lock_instances = self.op.secondary_ip is not None
5192
5193   def ExpandNames(self):
5194     if self.lock_all:
5195       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5196     else:
5197       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5198
5199     if self.lock_instances:
5200       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5201
5202   def DeclareLocks(self, level):
5203     # If we have locked all instances, before waiting to lock nodes, release
5204     # all the ones living on nodes unrelated to the current operation.
5205     if level == locking.LEVEL_NODE and self.lock_instances:
5206       self.affected_instances = []
5207       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5208         instances_keep = []
5209
5210         # Build list of instances to release
5211         locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5212         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5213           if (instance.disk_template in constants.DTS_INT_MIRROR and
5214               self.op.node_name in instance.all_nodes):
5215             instances_keep.append(instance_name)
5216             self.affected_instances.append(instance)
5217
5218         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5219
5220         assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5221                 set(instances_keep))
5222
5223   def BuildHooksEnv(self):
5224     """Build hooks env.
5225
5226     This runs on the master node.
5227
5228     """
5229     return {
5230       "OP_TARGET": self.op.node_name,
5231       "MASTER_CANDIDATE": str(self.op.master_candidate),
5232       "OFFLINE": str(self.op.offline),
5233       "DRAINED": str(self.op.drained),
5234       "MASTER_CAPABLE": str(self.op.master_capable),
5235       "VM_CAPABLE": str(self.op.vm_capable),
5236       }
5237
5238   def BuildHooksNodes(self):
5239     """Build hooks nodes.
5240
5241     """
5242     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5243     return (nl, nl)
5244
5245   def CheckPrereq(self):
5246     """Check prerequisites.
5247
5248     This only checks the instance list against the existing names.
5249
5250     """
5251     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5252
5253     if (self.op.master_candidate is not None or
5254         self.op.drained is not None or
5255         self.op.offline is not None):
5256       # we can't change the master's node flags
5257       if self.op.node_name == self.cfg.GetMasterNode():
5258         raise errors.OpPrereqError("The master role can be changed"
5259                                    " only via master-failover",
5260                                    errors.ECODE_INVAL)
5261
5262     if self.op.master_candidate and not node.master_capable:
5263       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5264                                  " it a master candidate" % node.name,
5265                                  errors.ECODE_STATE)
5266
5267     if self.op.vm_capable == False:
5268       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5269       if ipri or isec:
5270         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5271                                    " the vm_capable flag" % node.name,
5272                                    errors.ECODE_STATE)
5273
5274     if node.master_candidate and self.might_demote and not self.lock_all:
5275       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5276       # check if after removing the current node, we're missing master
5277       # candidates
5278       (mc_remaining, mc_should, _) = \
5279           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5280       if mc_remaining < mc_should:
5281         raise errors.OpPrereqError("Not enough master candidates, please"
5282                                    " pass auto promote option to allow"
5283                                    " promotion", errors.ECODE_STATE)
5284
5285     self.old_flags = old_flags = (node.master_candidate,
5286                                   node.drained, node.offline)
5287     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5288     self.old_role = old_role = self._F2R[old_flags]
5289
5290     # Check for ineffective changes
5291     for attr in self._FLAGS:
5292       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5293         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5294         setattr(self.op, attr, None)
5295
5296     # Past this point, any flag change to False means a transition
5297     # away from the respective state, as only real changes are kept
5298
5299     # TODO: We might query the real power state if it supports OOB
5300     if _SupportsOob(self.cfg, node):
5301       if self.op.offline is False and not (node.powered or
5302                                            self.op.powered == True):
5303         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5304                                     " offline status can be reset") %
5305                                    self.op.node_name)
5306     elif self.op.powered is not None:
5307       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5308                                   " as it does not support out-of-band"
5309                                   " handling") % self.op.node_name)
5310
5311     # If we're being deofflined/drained, we'll MC ourself if needed
5312     if (self.op.drained == False or self.op.offline == False or
5313         (self.op.master_capable and not node.master_capable)):
5314       if _DecideSelfPromotion(self):
5315         self.op.master_candidate = True
5316         self.LogInfo("Auto-promoting node to master candidate")
5317
5318     # If we're no longer master capable, we'll demote ourselves from MC
5319     if self.op.master_capable == False and node.master_candidate:
5320       self.LogInfo("Demoting from master candidate")
5321       self.op.master_candidate = False
5322
5323     # Compute new role
5324     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5325     if self.op.master_candidate:
5326       new_role = self._ROLE_CANDIDATE
5327     elif self.op.drained:
5328       new_role = self._ROLE_DRAINED
5329     elif self.op.offline:
5330       new_role = self._ROLE_OFFLINE
5331     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5332       # False is still in new flags, which means we're un-setting (the
5333       # only) True flag
5334       new_role = self._ROLE_REGULAR
5335     else: # no new flags, nothing, keep old role
5336       new_role = old_role
5337
5338     self.new_role = new_role
5339
5340     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5341       # Trying to transition out of offline status
5342       result = self.rpc.call_version([node.name])[node.name]
5343       if result.fail_msg:
5344         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5345                                    " to report its version: %s" %
5346                                    (node.name, result.fail_msg),
5347                                    errors.ECODE_STATE)
5348       else:
5349         self.LogWarning("Transitioning node from offline to online state"
5350                         " without using re-add. Please make sure the node"
5351                         " is healthy!")
5352
5353     if self.op.secondary_ip:
5354       # Ok even without locking, because this can't be changed by any LU
5355       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5356       master_singlehomed = master.secondary_ip == master.primary_ip
5357       if master_singlehomed and self.op.secondary_ip:
5358         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5359                                    " homed cluster", errors.ECODE_INVAL)
5360
5361       if node.offline:
5362         if self.affected_instances:
5363           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5364                                      " node has instances (%s) configured"
5365                                      " to use it" % self.affected_instances)
5366       else:
5367         # On online nodes, check that no instances are running, and that
5368         # the node has the new ip and we can reach it.
5369         for instance in self.affected_instances:
5370           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5371
5372         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5373         if master.name != node.name:
5374           # check reachability from master secondary ip to new secondary ip
5375           if not netutils.TcpPing(self.op.secondary_ip,
5376                                   constants.DEFAULT_NODED_PORT,
5377                                   source=master.secondary_ip):
5378             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5379                                        " based ping to node daemon port",
5380                                        errors.ECODE_ENVIRON)
5381
5382     if self.op.ndparams:
5383       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5384       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5385       self.new_ndparams = new_ndparams
5386
5387   def Exec(self, feedback_fn):
5388     """Modifies a node.
5389
5390     """
5391     node = self.node
5392     old_role = self.old_role
5393     new_role = self.new_role
5394
5395     result = []
5396
5397     if self.op.ndparams:
5398       node.ndparams = self.new_ndparams
5399
5400     if self.op.powered is not None:
5401       node.powered = self.op.powered
5402
5403     for attr in ["master_capable", "vm_capable"]:
5404       val = getattr(self.op, attr)
5405       if val is not None:
5406         setattr(node, attr, val)
5407         result.append((attr, str(val)))
5408
5409     if new_role != old_role:
5410       # Tell the node to demote itself, if no longer MC and not offline
5411       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5412         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5413         if msg:
5414           self.LogWarning("Node failed to demote itself: %s", msg)
5415
5416       new_flags = self._R2F[new_role]
5417       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5418         if of != nf:
5419           result.append((desc, str(nf)))
5420       (node.master_candidate, node.drained, node.offline) = new_flags
5421
5422       # we locked all nodes, we adjust the CP before updating this node
5423       if self.lock_all:
5424         _AdjustCandidatePool(self, [node.name])
5425
5426     if self.op.secondary_ip:
5427       node.secondary_ip = self.op.secondary_ip
5428       result.append(("secondary_ip", self.op.secondary_ip))
5429
5430     # this will trigger configuration file update, if needed
5431     self.cfg.Update(node, feedback_fn)
5432
5433     # this will trigger job queue propagation or cleanup if the mc
5434     # flag changed
5435     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5436       self.context.ReaddNode(node)
5437
5438     return result
5439
5440
5441 class LUNodePowercycle(NoHooksLU):
5442   """Powercycles a node.
5443
5444   """
5445   REQ_BGL = False
5446
5447   def CheckArguments(self):
5448     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5449     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5450       raise errors.OpPrereqError("The node is the master and the force"
5451                                  " parameter was not set",
5452                                  errors.ECODE_INVAL)
5453
5454   def ExpandNames(self):
5455     """Locking for PowercycleNode.
5456
5457     This is a last-resort option and shouldn't block on other
5458     jobs. Therefore, we grab no locks.
5459
5460     """
5461     self.needed_locks = {}
5462
5463   def Exec(self, feedback_fn):
5464     """Reboots a node.
5465
5466     """
5467     result = self.rpc.call_node_powercycle(self.op.node_name,
5468                                            self.cfg.GetHypervisorType())
5469     result.Raise("Failed to schedule the reboot")
5470     return result.payload
5471
5472
5473 class LUClusterQuery(NoHooksLU):
5474   """Query cluster configuration.
5475
5476   """
5477   REQ_BGL = False
5478
5479   def ExpandNames(self):
5480     self.needed_locks = {}
5481
5482   def Exec(self, feedback_fn):
5483     """Return cluster config.
5484
5485     """
5486     cluster = self.cfg.GetClusterInfo()
5487     os_hvp = {}
5488
5489     # Filter just for enabled hypervisors
5490     for os_name, hv_dict in cluster.os_hvp.items():
5491       os_hvp[os_name] = {}
5492       for hv_name, hv_params in hv_dict.items():
5493         if hv_name in cluster.enabled_hypervisors:
5494           os_hvp[os_name][hv_name] = hv_params
5495
5496     # Convert ip_family to ip_version
5497     primary_ip_version = constants.IP4_VERSION
5498     if cluster.primary_ip_family == netutils.IP6Address.family:
5499       primary_ip_version = constants.IP6_VERSION
5500
5501     result = {
5502       "software_version": constants.RELEASE_VERSION,
5503       "protocol_version": constants.PROTOCOL_VERSION,
5504       "config_version": constants.CONFIG_VERSION,
5505       "os_api_version": max(constants.OS_API_VERSIONS),
5506       "export_version": constants.EXPORT_VERSION,
5507       "architecture": (platform.architecture()[0], platform.machine()),
5508       "name": cluster.cluster_name,
5509       "master": cluster.master_node,
5510       "default_hypervisor": cluster.enabled_hypervisors[0],
5511       "enabled_hypervisors": cluster.enabled_hypervisors,
5512       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5513                         for hypervisor_name in cluster.enabled_hypervisors]),
5514       "os_hvp": os_hvp,
5515       "beparams": cluster.beparams,
5516       "osparams": cluster.osparams,
5517       "nicparams": cluster.nicparams,
5518       "ndparams": cluster.ndparams,
5519       "candidate_pool_size": cluster.candidate_pool_size,
5520       "master_netdev": cluster.master_netdev,
5521       "volume_group_name": cluster.volume_group_name,
5522       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5523       "file_storage_dir": cluster.file_storage_dir,
5524       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5525       "maintain_node_health": cluster.maintain_node_health,
5526       "ctime": cluster.ctime,
5527       "mtime": cluster.mtime,
5528       "uuid": cluster.uuid,
5529       "tags": list(cluster.GetTags()),
5530       "uid_pool": cluster.uid_pool,
5531       "default_iallocator": cluster.default_iallocator,
5532       "reserved_lvs": cluster.reserved_lvs,
5533       "primary_ip_version": primary_ip_version,
5534       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5535       "hidden_os": cluster.hidden_os,
5536       "blacklisted_os": cluster.blacklisted_os,
5537       }
5538
5539     return result
5540
5541
5542 class LUClusterConfigQuery(NoHooksLU):
5543   """Return configuration values.
5544
5545   """
5546   REQ_BGL = False
5547   _FIELDS_DYNAMIC = utils.FieldSet()
5548   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5549                                   "watcher_pause", "volume_group_name")
5550
5551   def CheckArguments(self):
5552     _CheckOutputFields(static=self._FIELDS_STATIC,
5553                        dynamic=self._FIELDS_DYNAMIC,
5554                        selected=self.op.output_fields)
5555
5556   def ExpandNames(self):
5557     self.needed_locks = {}
5558
5559   def Exec(self, feedback_fn):
5560     """Dump a representation of the cluster config to the standard output.
5561
5562     """
5563     values = []
5564     for field in self.op.output_fields:
5565       if field == "cluster_name":
5566         entry = self.cfg.GetClusterName()
5567       elif field == "master_node":
5568         entry = self.cfg.GetMasterNode()
5569       elif field == "drain_flag":
5570         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5571       elif field == "watcher_pause":
5572         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5573       elif field == "volume_group_name":
5574         entry = self.cfg.GetVGName()
5575       else:
5576         raise errors.ParameterError(field)
5577       values.append(entry)
5578     return values
5579
5580
5581 class LUInstanceActivateDisks(NoHooksLU):
5582   """Bring up an instance's disks.
5583
5584   """
5585   REQ_BGL = False
5586
5587   def ExpandNames(self):
5588     self._ExpandAndLockInstance()
5589     self.needed_locks[locking.LEVEL_NODE] = []
5590     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5591
5592   def DeclareLocks(self, level):
5593     if level == locking.LEVEL_NODE:
5594       self._LockInstancesNodes()
5595
5596   def CheckPrereq(self):
5597     """Check prerequisites.
5598
5599     This checks that the instance is in the cluster.
5600
5601     """
5602     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5603     assert self.instance is not None, \
5604       "Cannot retrieve locked instance %s" % self.op.instance_name
5605     _CheckNodeOnline(self, self.instance.primary_node)
5606
5607   def Exec(self, feedback_fn):
5608     """Activate the disks.
5609
5610     """
5611     disks_ok, disks_info = \
5612               _AssembleInstanceDisks(self, self.instance,
5613                                      ignore_size=self.op.ignore_size)
5614     if not disks_ok:
5615       raise errors.OpExecError("Cannot activate block devices")
5616
5617     return disks_info
5618
5619
5620 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5621                            ignore_size=False):
5622   """Prepare the block devices for an instance.
5623
5624   This sets up the block devices on all nodes.
5625
5626   @type lu: L{LogicalUnit}
5627   @param lu: the logical unit on whose behalf we execute
5628   @type instance: L{objects.Instance}
5629   @param instance: the instance for whose disks we assemble
5630   @type disks: list of L{objects.Disk} or None
5631   @param disks: which disks to assemble (or all, if None)
5632   @type ignore_secondaries: boolean
5633   @param ignore_secondaries: if true, errors on secondary nodes
5634       won't result in an error return from the function
5635   @type ignore_size: boolean
5636   @param ignore_size: if true, the current known size of the disk
5637       will not be used during the disk activation, useful for cases
5638       when the size is wrong
5639   @return: False if the operation failed, otherwise a list of
5640       (host, instance_visible_name, node_visible_name)
5641       with the mapping from node devices to instance devices
5642
5643   """
5644   device_info = []
5645   disks_ok = True
5646   iname = instance.name
5647   disks = _ExpandCheckDisks(instance, disks)
5648
5649   # With the two passes mechanism we try to reduce the window of
5650   # opportunity for the race condition of switching DRBD to primary
5651   # before handshaking occured, but we do not eliminate it
5652
5653   # The proper fix would be to wait (with some limits) until the
5654   # connection has been made and drbd transitions from WFConnection
5655   # into any other network-connected state (Connected, SyncTarget,
5656   # SyncSource, etc.)
5657
5658   # 1st pass, assemble on all nodes in secondary mode
5659   for idx, inst_disk in enumerate(disks):
5660     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5661       if ignore_size:
5662         node_disk = node_disk.Copy()
5663         node_disk.UnsetSize()
5664       lu.cfg.SetDiskID(node_disk, node)
5665       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5666       msg = result.fail_msg
5667       if msg:
5668         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5669                            " (is_primary=False, pass=1): %s",
5670                            inst_disk.iv_name, node, msg)
5671         if not ignore_secondaries:
5672           disks_ok = False
5673
5674   # FIXME: race condition on drbd migration to primary
5675
5676   # 2nd pass, do only the primary node
5677   for idx, inst_disk in enumerate(disks):
5678     dev_path = None
5679
5680     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5681       if node != instance.primary_node:
5682         continue
5683       if ignore_size:
5684         node_disk = node_disk.Copy()
5685         node_disk.UnsetSize()
5686       lu.cfg.SetDiskID(node_disk, node)
5687       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5688       msg = result.fail_msg
5689       if msg:
5690         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5691                            " (is_primary=True, pass=2): %s",
5692                            inst_disk.iv_name, node, msg)
5693         disks_ok = False
5694       else:
5695         dev_path = result.payload
5696
5697     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5698
5699   # leave the disks configured for the primary node
5700   # this is a workaround that would be fixed better by
5701   # improving the logical/physical id handling
5702   for disk in disks:
5703     lu.cfg.SetDiskID(disk, instance.primary_node)
5704
5705   return disks_ok, device_info
5706
5707
5708 def _StartInstanceDisks(lu, instance, force):
5709   """Start the disks of an instance.
5710
5711   """
5712   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5713                                            ignore_secondaries=force)
5714   if not disks_ok:
5715     _ShutdownInstanceDisks(lu, instance)
5716     if force is not None and not force:
5717       lu.proc.LogWarning("", hint="If the message above refers to a"
5718                          " secondary node,"
5719                          " you can retry the operation using '--force'.")
5720     raise errors.OpExecError("Disk consistency error")
5721
5722
5723 class LUInstanceDeactivateDisks(NoHooksLU):
5724   """Shutdown an instance's disks.
5725
5726   """
5727   REQ_BGL = False
5728
5729   def ExpandNames(self):
5730     self._ExpandAndLockInstance()
5731     self.needed_locks[locking.LEVEL_NODE] = []
5732     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5733
5734   def DeclareLocks(self, level):
5735     if level == locking.LEVEL_NODE:
5736       self._LockInstancesNodes()
5737
5738   def CheckPrereq(self):
5739     """Check prerequisites.
5740
5741     This checks that the instance is in the cluster.
5742
5743     """
5744     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5745     assert self.instance is not None, \
5746       "Cannot retrieve locked instance %s" % self.op.instance_name
5747
5748   def Exec(self, feedback_fn):
5749     """Deactivate the disks
5750
5751     """
5752     instance = self.instance
5753     if self.op.force:
5754       _ShutdownInstanceDisks(self, instance)
5755     else:
5756       _SafeShutdownInstanceDisks(self, instance)
5757
5758
5759 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5760   """Shutdown block devices of an instance.
5761
5762   This function checks if an instance is running, before calling
5763   _ShutdownInstanceDisks.
5764
5765   """
5766   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5767   _ShutdownInstanceDisks(lu, instance, disks=disks)
5768
5769
5770 def _ExpandCheckDisks(instance, disks):
5771   """Return the instance disks selected by the disks list
5772
5773   @type disks: list of L{objects.Disk} or None
5774   @param disks: selected disks
5775   @rtype: list of L{objects.Disk}
5776   @return: selected instance disks to act on
5777
5778   """
5779   if disks is None:
5780     return instance.disks
5781   else:
5782     if not set(disks).issubset(instance.disks):
5783       raise errors.ProgrammerError("Can only act on disks belonging to the"
5784                                    " target instance")
5785     return disks
5786
5787
5788 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5789   """Shutdown block devices of an instance.
5790
5791   This does the shutdown on all nodes of the instance.
5792
5793   If the ignore_primary is false, errors on the primary node are
5794   ignored.
5795
5796   """
5797   all_result = True
5798   disks = _ExpandCheckDisks(instance, disks)
5799
5800   for disk in disks:
5801     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5802       lu.cfg.SetDiskID(top_disk, node)
5803       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5804       msg = result.fail_msg
5805       if msg:
5806         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5807                       disk.iv_name, node, msg)
5808         if ((node == instance.primary_node and not ignore_primary) or
5809             (node != instance.primary_node and not result.offline)):
5810           all_result = False
5811   return all_result
5812
5813
5814 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5815   """Checks if a node has enough free memory.
5816
5817   This function check if a given node has the needed amount of free
5818   memory. In case the node has less memory or we cannot get the
5819   information from the node, this function raise an OpPrereqError
5820   exception.
5821
5822   @type lu: C{LogicalUnit}
5823   @param lu: a logical unit from which we get configuration data
5824   @type node: C{str}
5825   @param node: the node to check
5826   @type reason: C{str}
5827   @param reason: string to use in the error message
5828   @type requested: C{int}
5829   @param requested: the amount of memory in MiB to check for
5830   @type hypervisor_name: C{str}
5831   @param hypervisor_name: the hypervisor to ask for memory stats
5832   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5833       we cannot check the node
5834
5835   """
5836   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5837   nodeinfo[node].Raise("Can't get data from node %s" % node,
5838                        prereq=True, ecode=errors.ECODE_ENVIRON)
5839   free_mem = nodeinfo[node].payload.get("memory_free", None)
5840   if not isinstance(free_mem, int):
5841     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5842                                " was '%s'" % (node, free_mem),
5843                                errors.ECODE_ENVIRON)
5844   if requested > free_mem:
5845     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5846                                " needed %s MiB, available %s MiB" %
5847                                (node, reason, requested, free_mem),
5848                                errors.ECODE_NORES)
5849
5850
5851 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5852   """Checks if nodes have enough free disk space in the all VGs.
5853
5854   This function check if all given nodes have the needed amount of
5855   free disk. In case any node has less disk or we cannot get the
5856   information from the node, this function raise an OpPrereqError
5857   exception.
5858
5859   @type lu: C{LogicalUnit}
5860   @param lu: a logical unit from which we get configuration data
5861   @type nodenames: C{list}
5862   @param nodenames: the list of node names to check
5863   @type req_sizes: C{dict}
5864   @param req_sizes: the hash of vg and corresponding amount of disk in
5865       MiB to check for
5866   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5867       or we cannot check the node
5868
5869   """
5870   for vg, req_size in req_sizes.items():
5871     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5872
5873
5874 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5875   """Checks if nodes have enough free disk space in the specified VG.
5876
5877   This function check if all given nodes have the needed amount of
5878   free disk. In case any node has less disk or we cannot get the
5879   information from the node, this function raise an OpPrereqError
5880   exception.
5881
5882   @type lu: C{LogicalUnit}
5883   @param lu: a logical unit from which we get configuration data
5884   @type nodenames: C{list}
5885   @param nodenames: the list of node names to check
5886   @type vg: C{str}
5887   @param vg: the volume group to check
5888   @type requested: C{int}
5889   @param requested: the amount of disk in MiB to check for
5890   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5891       or we cannot check the node
5892
5893   """
5894   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5895   for node in nodenames:
5896     info = nodeinfo[node]
5897     info.Raise("Cannot get current information from node %s" % node,
5898                prereq=True, ecode=errors.ECODE_ENVIRON)
5899     vg_free = info.payload.get("vg_free", None)
5900     if not isinstance(vg_free, int):
5901       raise errors.OpPrereqError("Can't compute free disk space on node"
5902                                  " %s for vg %s, result was '%s'" %
5903                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5904     if requested > vg_free:
5905       raise errors.OpPrereqError("Not enough disk space on target node %s"
5906                                  " vg %s: required %d MiB, available %d MiB" %
5907                                  (node, vg, requested, vg_free),
5908                                  errors.ECODE_NORES)
5909
5910
5911 class LUInstanceStartup(LogicalUnit):
5912   """Starts an instance.
5913
5914   """
5915   HPATH = "instance-start"
5916   HTYPE = constants.HTYPE_INSTANCE
5917   REQ_BGL = False
5918
5919   def CheckArguments(self):
5920     # extra beparams
5921     if self.op.beparams:
5922       # fill the beparams dict
5923       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5924
5925   def ExpandNames(self):
5926     self._ExpandAndLockInstance()
5927
5928   def BuildHooksEnv(self):
5929     """Build hooks env.
5930
5931     This runs on master, primary and secondary nodes of the instance.
5932
5933     """
5934     env = {
5935       "FORCE": self.op.force,
5936       }
5937
5938     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5939
5940     return env
5941
5942   def BuildHooksNodes(self):
5943     """Build hooks nodes.
5944
5945     """
5946     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5947     return (nl, nl)
5948
5949   def CheckPrereq(self):
5950     """Check prerequisites.
5951
5952     This checks that the instance is in the cluster.
5953
5954     """
5955     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5956     assert self.instance is not None, \
5957       "Cannot retrieve locked instance %s" % self.op.instance_name
5958
5959     # extra hvparams
5960     if self.op.hvparams:
5961       # check hypervisor parameter syntax (locally)
5962       cluster = self.cfg.GetClusterInfo()
5963       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5964       filled_hvp = cluster.FillHV(instance)
5965       filled_hvp.update(self.op.hvparams)
5966       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5967       hv_type.CheckParameterSyntax(filled_hvp)
5968       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5969
5970     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5971
5972     if self.primary_offline and self.op.ignore_offline_nodes:
5973       self.proc.LogWarning("Ignoring offline primary node")
5974
5975       if self.op.hvparams or self.op.beparams:
5976         self.proc.LogWarning("Overridden parameters are ignored")
5977     else:
5978       _CheckNodeOnline(self, instance.primary_node)
5979
5980       bep = self.cfg.GetClusterInfo().FillBE(instance)
5981
5982       # check bridges existence
5983       _CheckInstanceBridgesExist(self, instance)
5984
5985       remote_info = self.rpc.call_instance_info(instance.primary_node,
5986                                                 instance.name,
5987                                                 instance.hypervisor)
5988       remote_info.Raise("Error checking node %s" % instance.primary_node,
5989                         prereq=True, ecode=errors.ECODE_ENVIRON)
5990       if not remote_info.payload: # not running already
5991         _CheckNodeFreeMemory(self, instance.primary_node,
5992                              "starting instance %s" % instance.name,
5993                              bep[constants.BE_MEMORY], instance.hypervisor)
5994
5995   def Exec(self, feedback_fn):
5996     """Start the instance.
5997
5998     """
5999     instance = self.instance
6000     force = self.op.force
6001
6002     if not self.op.no_remember:
6003       self.cfg.MarkInstanceUp(instance.name)
6004
6005     if self.primary_offline:
6006       assert self.op.ignore_offline_nodes
6007       self.proc.LogInfo("Primary node offline, marked instance as started")
6008     else:
6009       node_current = instance.primary_node
6010
6011       _StartInstanceDisks(self, instance, force)
6012
6013       result = self.rpc.call_instance_start(node_current, instance,
6014                                             self.op.hvparams, self.op.beparams,
6015                                             self.op.startup_paused)
6016       msg = result.fail_msg
6017       if msg:
6018         _ShutdownInstanceDisks(self, instance)
6019         raise errors.OpExecError("Could not start instance: %s" % msg)
6020
6021
6022 class LUInstanceReboot(LogicalUnit):
6023   """Reboot an instance.
6024
6025   """
6026   HPATH = "instance-reboot"
6027   HTYPE = constants.HTYPE_INSTANCE
6028   REQ_BGL = False
6029
6030   def ExpandNames(self):
6031     self._ExpandAndLockInstance()
6032
6033   def BuildHooksEnv(self):
6034     """Build hooks env.
6035
6036     This runs on master, primary and secondary nodes of the instance.
6037
6038     """
6039     env = {
6040       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6041       "REBOOT_TYPE": self.op.reboot_type,
6042       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6043       }
6044
6045     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6046
6047     return env
6048
6049   def BuildHooksNodes(self):
6050     """Build hooks nodes.
6051
6052     """
6053     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6054     return (nl, nl)
6055
6056   def CheckPrereq(self):
6057     """Check prerequisites.
6058
6059     This checks that the instance is in the cluster.
6060
6061     """
6062     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6063     assert self.instance is not None, \
6064       "Cannot retrieve locked instance %s" % self.op.instance_name
6065
6066     _CheckNodeOnline(self, instance.primary_node)
6067
6068     # check bridges existence
6069     _CheckInstanceBridgesExist(self, instance)
6070
6071   def Exec(self, feedback_fn):
6072     """Reboot the instance.
6073
6074     """
6075     instance = self.instance
6076     ignore_secondaries = self.op.ignore_secondaries
6077     reboot_type = self.op.reboot_type
6078
6079     remote_info = self.rpc.call_instance_info(instance.primary_node,
6080                                               instance.name,
6081                                               instance.hypervisor)
6082     remote_info.Raise("Error checking node %s" % instance.primary_node)
6083     instance_running = bool(remote_info.payload)
6084
6085     node_current = instance.primary_node
6086
6087     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6088                                             constants.INSTANCE_REBOOT_HARD]:
6089       for disk in instance.disks:
6090         self.cfg.SetDiskID(disk, node_current)
6091       result = self.rpc.call_instance_reboot(node_current, instance,
6092                                              reboot_type,
6093                                              self.op.shutdown_timeout)
6094       result.Raise("Could not reboot instance")
6095     else:
6096       if instance_running:
6097         result = self.rpc.call_instance_shutdown(node_current, instance,
6098                                                  self.op.shutdown_timeout)
6099         result.Raise("Could not shutdown instance for full reboot")
6100         _ShutdownInstanceDisks(self, instance)
6101       else:
6102         self.LogInfo("Instance %s was already stopped, starting now",
6103                      instance.name)
6104       _StartInstanceDisks(self, instance, ignore_secondaries)
6105       result = self.rpc.call_instance_start(node_current, instance,
6106                                             None, None, False)
6107       msg = result.fail_msg
6108       if msg:
6109         _ShutdownInstanceDisks(self, instance)
6110         raise errors.OpExecError("Could not start instance for"
6111                                  " full reboot: %s" % msg)
6112
6113     self.cfg.MarkInstanceUp(instance.name)
6114
6115
6116 class LUInstanceShutdown(LogicalUnit):
6117   """Shutdown an instance.
6118
6119   """
6120   HPATH = "instance-stop"
6121   HTYPE = constants.HTYPE_INSTANCE
6122   REQ_BGL = False
6123
6124   def ExpandNames(self):
6125     self._ExpandAndLockInstance()
6126
6127   def BuildHooksEnv(self):
6128     """Build hooks env.
6129
6130     This runs on master, primary and secondary nodes of the instance.
6131
6132     """
6133     env = _BuildInstanceHookEnvByObject(self, self.instance)
6134     env["TIMEOUT"] = self.op.timeout
6135     return env
6136
6137   def BuildHooksNodes(self):
6138     """Build hooks nodes.
6139
6140     """
6141     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6142     return (nl, nl)
6143
6144   def CheckPrereq(self):
6145     """Check prerequisites.
6146
6147     This checks that the instance is in the cluster.
6148
6149     """
6150     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6151     assert self.instance is not None, \
6152       "Cannot retrieve locked instance %s" % self.op.instance_name
6153
6154     self.primary_offline = \
6155       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6156
6157     if self.primary_offline and self.op.ignore_offline_nodes:
6158       self.proc.LogWarning("Ignoring offline primary node")
6159     else:
6160       _CheckNodeOnline(self, self.instance.primary_node)
6161
6162   def Exec(self, feedback_fn):
6163     """Shutdown the instance.
6164
6165     """
6166     instance = self.instance
6167     node_current = instance.primary_node
6168     timeout = self.op.timeout
6169
6170     if not self.op.no_remember:
6171       self.cfg.MarkInstanceDown(instance.name)
6172
6173     if self.primary_offline:
6174       assert self.op.ignore_offline_nodes
6175       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6176     else:
6177       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6178       msg = result.fail_msg
6179       if msg:
6180         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6181
6182       _ShutdownInstanceDisks(self, instance)
6183
6184
6185 class LUInstanceReinstall(LogicalUnit):
6186   """Reinstall an instance.
6187
6188   """
6189   HPATH = "instance-reinstall"
6190   HTYPE = constants.HTYPE_INSTANCE
6191   REQ_BGL = False
6192
6193   def ExpandNames(self):
6194     self._ExpandAndLockInstance()
6195
6196   def BuildHooksEnv(self):
6197     """Build hooks env.
6198
6199     This runs on master, primary and secondary nodes of the instance.
6200
6201     """
6202     return _BuildInstanceHookEnvByObject(self, self.instance)
6203
6204   def BuildHooksNodes(self):
6205     """Build hooks nodes.
6206
6207     """
6208     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6209     return (nl, nl)
6210
6211   def CheckPrereq(self):
6212     """Check prerequisites.
6213
6214     This checks that the instance is in the cluster and is not running.
6215
6216     """
6217     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6218     assert instance is not None, \
6219       "Cannot retrieve locked instance %s" % self.op.instance_name
6220     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6221                      " offline, cannot reinstall")
6222     for node in instance.secondary_nodes:
6223       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6224                        " cannot reinstall")
6225
6226     if instance.disk_template == constants.DT_DISKLESS:
6227       raise errors.OpPrereqError("Instance '%s' has no disks" %
6228                                  self.op.instance_name,
6229                                  errors.ECODE_INVAL)
6230     _CheckInstanceDown(self, instance, "cannot reinstall")
6231
6232     if self.op.os_type is not None:
6233       # OS verification
6234       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6235       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6236       instance_os = self.op.os_type
6237     else:
6238       instance_os = instance.os
6239
6240     nodelist = list(instance.all_nodes)
6241
6242     if self.op.osparams:
6243       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6244       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6245       self.os_inst = i_osdict # the new dict (without defaults)
6246     else:
6247       self.os_inst = None
6248
6249     self.instance = instance
6250
6251   def Exec(self, feedback_fn):
6252     """Reinstall the instance.
6253
6254     """
6255     inst = self.instance
6256
6257     if self.op.os_type is not None:
6258       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6259       inst.os = self.op.os_type
6260       # Write to configuration
6261       self.cfg.Update(inst, feedback_fn)
6262
6263     _StartInstanceDisks(self, inst, None)
6264     try:
6265       feedback_fn("Running the instance OS create scripts...")
6266       # FIXME: pass debug option from opcode to backend
6267       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6268                                              self.op.debug_level,
6269                                              osparams=self.os_inst)
6270       result.Raise("Could not install OS for instance %s on node %s" %
6271                    (inst.name, inst.primary_node))
6272     finally:
6273       _ShutdownInstanceDisks(self, inst)
6274
6275
6276 class LUInstanceRecreateDisks(LogicalUnit):
6277   """Recreate an instance's missing disks.
6278
6279   """
6280   HPATH = "instance-recreate-disks"
6281   HTYPE = constants.HTYPE_INSTANCE
6282   REQ_BGL = False
6283
6284   def CheckArguments(self):
6285     # normalise the disk list
6286     self.op.disks = sorted(frozenset(self.op.disks))
6287
6288   def ExpandNames(self):
6289     self._ExpandAndLockInstance()
6290     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6291     if self.op.nodes:
6292       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6293       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6294     else:
6295       self.needed_locks[locking.LEVEL_NODE] = []
6296
6297   def DeclareLocks(self, level):
6298     if level == locking.LEVEL_NODE:
6299       # if we replace the nodes, we only need to lock the old primary,
6300       # otherwise we need to lock all nodes for disk re-creation
6301       primary_only = bool(self.op.nodes)
6302       self._LockInstancesNodes(primary_only=primary_only)
6303
6304   def BuildHooksEnv(self):
6305     """Build hooks env.
6306
6307     This runs on master, primary and secondary nodes of the instance.
6308
6309     """
6310     return _BuildInstanceHookEnvByObject(self, self.instance)
6311
6312   def BuildHooksNodes(self):
6313     """Build hooks nodes.
6314
6315     """
6316     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6317     return (nl, nl)
6318
6319   def CheckPrereq(self):
6320     """Check prerequisites.
6321
6322     This checks that the instance is in the cluster and is not running.
6323
6324     """
6325     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6326     assert instance is not None, \
6327       "Cannot retrieve locked instance %s" % self.op.instance_name
6328     if self.op.nodes:
6329       if len(self.op.nodes) != len(instance.all_nodes):
6330         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6331                                    " %d replacement nodes were specified" %
6332                                    (instance.name, len(instance.all_nodes),
6333                                     len(self.op.nodes)),
6334                                    errors.ECODE_INVAL)
6335       assert instance.disk_template != constants.DT_DRBD8 or \
6336           len(self.op.nodes) == 2
6337       assert instance.disk_template != constants.DT_PLAIN or \
6338           len(self.op.nodes) == 1
6339       primary_node = self.op.nodes[0]
6340     else:
6341       primary_node = instance.primary_node
6342     _CheckNodeOnline(self, primary_node)
6343
6344     if instance.disk_template == constants.DT_DISKLESS:
6345       raise errors.OpPrereqError("Instance '%s' has no disks" %
6346                                  self.op.instance_name, errors.ECODE_INVAL)
6347     # if we replace nodes *and* the old primary is offline, we don't
6348     # check
6349     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6350     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6351     if not (self.op.nodes and old_pnode.offline):
6352       _CheckInstanceDown(self, instance, "cannot recreate disks")
6353
6354     if not self.op.disks:
6355       self.op.disks = range(len(instance.disks))
6356     else:
6357       for idx in self.op.disks:
6358         if idx >= len(instance.disks):
6359           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6360                                      errors.ECODE_INVAL)
6361     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6362       raise errors.OpPrereqError("Can't recreate disks partially and"
6363                                  " change the nodes at the same time",
6364                                  errors.ECODE_INVAL)
6365     self.instance = instance
6366
6367   def Exec(self, feedback_fn):
6368     """Recreate the disks.
6369
6370     """
6371     instance = self.instance
6372
6373     to_skip = []
6374     mods = [] # keeps track of needed logical_id changes
6375
6376     for idx, disk in enumerate(instance.disks):
6377       if idx not in self.op.disks: # disk idx has not been passed in
6378         to_skip.append(idx)
6379         continue
6380       # update secondaries for disks, if needed
6381       if self.op.nodes:
6382         if disk.dev_type == constants.LD_DRBD8:
6383           # need to update the nodes and minors
6384           assert len(self.op.nodes) == 2
6385           assert len(disk.logical_id) == 6 # otherwise disk internals
6386                                            # have changed
6387           (_, _, old_port, _, _, old_secret) = disk.logical_id
6388           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6389           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6390                     new_minors[0], new_minors[1], old_secret)
6391           assert len(disk.logical_id) == len(new_id)
6392           mods.append((idx, new_id))
6393
6394     # now that we have passed all asserts above, we can apply the mods
6395     # in a single run (to avoid partial changes)
6396     for idx, new_id in mods:
6397       instance.disks[idx].logical_id = new_id
6398
6399     # change primary node, if needed
6400     if self.op.nodes:
6401       instance.primary_node = self.op.nodes[0]
6402       self.LogWarning("Changing the instance's nodes, you will have to"
6403                       " remove any disks left on the older nodes manually")
6404
6405     if self.op.nodes:
6406       self.cfg.Update(instance, feedback_fn)
6407
6408     _CreateDisks(self, instance, to_skip=to_skip)
6409
6410
6411 class LUInstanceRename(LogicalUnit):
6412   """Rename an instance.
6413
6414   """
6415   HPATH = "instance-rename"
6416   HTYPE = constants.HTYPE_INSTANCE
6417
6418   def CheckArguments(self):
6419     """Check arguments.
6420
6421     """
6422     if self.op.ip_check and not self.op.name_check:
6423       # TODO: make the ip check more flexible and not depend on the name check
6424       raise errors.OpPrereqError("IP address check requires a name check",
6425                                  errors.ECODE_INVAL)
6426
6427   def BuildHooksEnv(self):
6428     """Build hooks env.
6429
6430     This runs on master, primary and secondary nodes of the instance.
6431
6432     """
6433     env = _BuildInstanceHookEnvByObject(self, self.instance)
6434     env["INSTANCE_NEW_NAME"] = self.op.new_name
6435     return env
6436
6437   def BuildHooksNodes(self):
6438     """Build hooks nodes.
6439
6440     """
6441     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6442     return (nl, nl)
6443
6444   def CheckPrereq(self):
6445     """Check prerequisites.
6446
6447     This checks that the instance is in the cluster and is not running.
6448
6449     """
6450     self.op.instance_name = _ExpandInstanceName(self.cfg,
6451                                                 self.op.instance_name)
6452     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6453     assert instance is not None
6454     _CheckNodeOnline(self, instance.primary_node)
6455     _CheckInstanceDown(self, instance, "cannot rename")
6456     self.instance = instance
6457
6458     new_name = self.op.new_name
6459     if self.op.name_check:
6460       hostname = netutils.GetHostname(name=new_name)
6461       if hostname != new_name:
6462         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6463                      hostname.name)
6464       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6465         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6466                                     " same as given hostname '%s'") %
6467                                     (hostname.name, self.op.new_name),
6468                                     errors.ECODE_INVAL)
6469       new_name = self.op.new_name = hostname.name
6470       if (self.op.ip_check and
6471           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6472         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6473                                    (hostname.ip, new_name),
6474                                    errors.ECODE_NOTUNIQUE)
6475
6476     instance_list = self.cfg.GetInstanceList()
6477     if new_name in instance_list and new_name != instance.name:
6478       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6479                                  new_name, errors.ECODE_EXISTS)
6480
6481   def Exec(self, feedback_fn):
6482     """Rename the instance.
6483
6484     """
6485     inst = self.instance
6486     old_name = inst.name
6487
6488     rename_file_storage = False
6489     if (inst.disk_template in constants.DTS_FILEBASED and
6490         self.op.new_name != inst.name):
6491       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6492       rename_file_storage = True
6493
6494     self.cfg.RenameInstance(inst.name, self.op.new_name)
6495     # Change the instance lock. This is definitely safe while we hold the BGL.
6496     # Otherwise the new lock would have to be added in acquired mode.
6497     assert self.REQ_BGL
6498     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6499     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6500
6501     # re-read the instance from the configuration after rename
6502     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6503
6504     if rename_file_storage:
6505       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6506       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6507                                                      old_file_storage_dir,
6508                                                      new_file_storage_dir)
6509       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6510                    " (but the instance has been renamed in Ganeti)" %
6511                    (inst.primary_node, old_file_storage_dir,
6512                     new_file_storage_dir))
6513
6514     _StartInstanceDisks(self, inst, None)
6515     try:
6516       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6517                                                  old_name, self.op.debug_level)
6518       msg = result.fail_msg
6519       if msg:
6520         msg = ("Could not run OS rename script for instance %s on node %s"
6521                " (but the instance has been renamed in Ganeti): %s" %
6522                (inst.name, inst.primary_node, msg))
6523         self.proc.LogWarning(msg)
6524     finally:
6525       _ShutdownInstanceDisks(self, inst)
6526
6527     return inst.name
6528
6529
6530 class LUInstanceRemove(LogicalUnit):
6531   """Remove an instance.
6532
6533   """
6534   HPATH = "instance-remove"
6535   HTYPE = constants.HTYPE_INSTANCE
6536   REQ_BGL = False
6537
6538   def ExpandNames(self):
6539     self._ExpandAndLockInstance()
6540     self.needed_locks[locking.LEVEL_NODE] = []
6541     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6542
6543   def DeclareLocks(self, level):
6544     if level == locking.LEVEL_NODE:
6545       self._LockInstancesNodes()
6546
6547   def BuildHooksEnv(self):
6548     """Build hooks env.
6549
6550     This runs on master, primary and secondary nodes of the instance.
6551
6552     """
6553     env = _BuildInstanceHookEnvByObject(self, self.instance)
6554     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6555     return env
6556
6557   def BuildHooksNodes(self):
6558     """Build hooks nodes.
6559
6560     """
6561     nl = [self.cfg.GetMasterNode()]
6562     nl_post = list(self.instance.all_nodes) + nl
6563     return (nl, nl_post)
6564
6565   def CheckPrereq(self):
6566     """Check prerequisites.
6567
6568     This checks that the instance is in the cluster.
6569
6570     """
6571     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6572     assert self.instance is not None, \
6573       "Cannot retrieve locked instance %s" % self.op.instance_name
6574
6575   def Exec(self, feedback_fn):
6576     """Remove the instance.
6577
6578     """
6579     instance = self.instance
6580     logging.info("Shutting down instance %s on node %s",
6581                  instance.name, instance.primary_node)
6582
6583     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6584                                              self.op.shutdown_timeout)
6585     msg = result.fail_msg
6586     if msg:
6587       if self.op.ignore_failures:
6588         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6589       else:
6590         raise errors.OpExecError("Could not shutdown instance %s on"
6591                                  " node %s: %s" %
6592                                  (instance.name, instance.primary_node, msg))
6593
6594     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6595
6596
6597 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6598   """Utility function to remove an instance.
6599
6600   """
6601   logging.info("Removing block devices for instance %s", instance.name)
6602
6603   if not _RemoveDisks(lu, instance):
6604     if not ignore_failures:
6605       raise errors.OpExecError("Can't remove instance's disks")
6606     feedback_fn("Warning: can't remove instance's disks")
6607
6608   logging.info("Removing instance %s out of cluster config", instance.name)
6609
6610   lu.cfg.RemoveInstance(instance.name)
6611
6612   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6613     "Instance lock removal conflict"
6614
6615   # Remove lock for the instance
6616   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6617
6618
6619 class LUInstanceQuery(NoHooksLU):
6620   """Logical unit for querying instances.
6621
6622   """
6623   # pylint: disable=W0142
6624   REQ_BGL = False
6625
6626   def CheckArguments(self):
6627     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6628                              self.op.output_fields, self.op.use_locking)
6629
6630   def ExpandNames(self):
6631     self.iq.ExpandNames(self)
6632
6633   def DeclareLocks(self, level):
6634     self.iq.DeclareLocks(self, level)
6635
6636   def Exec(self, feedback_fn):
6637     return self.iq.OldStyleQuery(self)
6638
6639
6640 class LUInstanceFailover(LogicalUnit):
6641   """Failover an instance.
6642
6643   """
6644   HPATH = "instance-failover"
6645   HTYPE = constants.HTYPE_INSTANCE
6646   REQ_BGL = False
6647
6648   def CheckArguments(self):
6649     """Check the arguments.
6650
6651     """
6652     self.iallocator = getattr(self.op, "iallocator", None)
6653     self.target_node = getattr(self.op, "target_node", None)
6654
6655   def ExpandNames(self):
6656     self._ExpandAndLockInstance()
6657
6658     if self.op.target_node is not None:
6659       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6660
6661     self.needed_locks[locking.LEVEL_NODE] = []
6662     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6663
6664     ignore_consistency = self.op.ignore_consistency
6665     shutdown_timeout = self.op.shutdown_timeout
6666     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6667                                        cleanup=False,
6668                                        failover=True,
6669                                        ignore_consistency=ignore_consistency,
6670                                        shutdown_timeout=shutdown_timeout)
6671     self.tasklets = [self._migrater]
6672
6673   def DeclareLocks(self, level):
6674     if level == locking.LEVEL_NODE:
6675       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6676       if instance.disk_template in constants.DTS_EXT_MIRROR:
6677         if self.op.target_node is None:
6678           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6679         else:
6680           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6681                                                    self.op.target_node]
6682         del self.recalculate_locks[locking.LEVEL_NODE]
6683       else:
6684         self._LockInstancesNodes()
6685
6686   def BuildHooksEnv(self):
6687     """Build hooks env.
6688
6689     This runs on master, primary and secondary nodes of the instance.
6690
6691     """
6692     instance = self._migrater.instance
6693     source_node = instance.primary_node
6694     target_node = self.op.target_node
6695     env = {
6696       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6697       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6698       "OLD_PRIMARY": source_node,
6699       "NEW_PRIMARY": target_node,
6700       }
6701
6702     if instance.disk_template in constants.DTS_INT_MIRROR:
6703       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6704       env["NEW_SECONDARY"] = source_node
6705     else:
6706       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6707
6708     env.update(_BuildInstanceHookEnvByObject(self, instance))
6709
6710     return env
6711
6712   def BuildHooksNodes(self):
6713     """Build hooks nodes.
6714
6715     """
6716     instance = self._migrater.instance
6717     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6718     return (nl, nl + [instance.primary_node])
6719
6720
6721 class LUInstanceMigrate(LogicalUnit):
6722   """Migrate an instance.
6723
6724   This is migration without shutting down, compared to the failover,
6725   which is done with shutdown.
6726
6727   """
6728   HPATH = "instance-migrate"
6729   HTYPE = constants.HTYPE_INSTANCE
6730   REQ_BGL = False
6731
6732   def ExpandNames(self):
6733     self._ExpandAndLockInstance()
6734
6735     if self.op.target_node is not None:
6736       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6737
6738     self.needed_locks[locking.LEVEL_NODE] = []
6739     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6740
6741     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6742                                        cleanup=self.op.cleanup,
6743                                        failover=False,
6744                                        fallback=self.op.allow_failover)
6745     self.tasklets = [self._migrater]
6746
6747   def DeclareLocks(self, level):
6748     if level == locking.LEVEL_NODE:
6749       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6750       if instance.disk_template in constants.DTS_EXT_MIRROR:
6751         if self.op.target_node is None:
6752           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6753         else:
6754           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6755                                                    self.op.target_node]
6756         del self.recalculate_locks[locking.LEVEL_NODE]
6757       else:
6758         self._LockInstancesNodes()
6759
6760   def BuildHooksEnv(self):
6761     """Build hooks env.
6762
6763     This runs on master, primary and secondary nodes of the instance.
6764
6765     """
6766     instance = self._migrater.instance
6767     source_node = instance.primary_node
6768     target_node = self.op.target_node
6769     env = _BuildInstanceHookEnvByObject(self, instance)
6770     env.update({
6771       "MIGRATE_LIVE": self._migrater.live,
6772       "MIGRATE_CLEANUP": self.op.cleanup,
6773       "OLD_PRIMARY": source_node,
6774       "NEW_PRIMARY": target_node,
6775       })
6776
6777     if instance.disk_template in constants.DTS_INT_MIRROR:
6778       env["OLD_SECONDARY"] = target_node
6779       env["NEW_SECONDARY"] = source_node
6780     else:
6781       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6782
6783     return env
6784
6785   def BuildHooksNodes(self):
6786     """Build hooks nodes.
6787
6788     """
6789     instance = self._migrater.instance
6790     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6791     return (nl, nl + [instance.primary_node])
6792
6793
6794 class LUInstanceMove(LogicalUnit):
6795   """Move an instance by data-copying.
6796
6797   """
6798   HPATH = "instance-move"
6799   HTYPE = constants.HTYPE_INSTANCE
6800   REQ_BGL = False
6801
6802   def ExpandNames(self):
6803     self._ExpandAndLockInstance()
6804     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6805     self.op.target_node = target_node
6806     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6807     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6808
6809   def DeclareLocks(self, level):
6810     if level == locking.LEVEL_NODE:
6811       self._LockInstancesNodes(primary_only=True)
6812
6813   def BuildHooksEnv(self):
6814     """Build hooks env.
6815
6816     This runs on master, primary and secondary nodes of the instance.
6817
6818     """
6819     env = {
6820       "TARGET_NODE": self.op.target_node,
6821       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6822       }
6823     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6824     return env
6825
6826   def BuildHooksNodes(self):
6827     """Build hooks nodes.
6828
6829     """
6830     nl = [
6831       self.cfg.GetMasterNode(),
6832       self.instance.primary_node,
6833       self.op.target_node,
6834       ]
6835     return (nl, nl)
6836
6837   def CheckPrereq(self):
6838     """Check prerequisites.
6839
6840     This checks that the instance is in the cluster.
6841
6842     """
6843     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6844     assert self.instance is not None, \
6845       "Cannot retrieve locked instance %s" % self.op.instance_name
6846
6847     node = self.cfg.GetNodeInfo(self.op.target_node)
6848     assert node is not None, \
6849       "Cannot retrieve locked node %s" % self.op.target_node
6850
6851     self.target_node = target_node = node.name
6852
6853     if target_node == instance.primary_node:
6854       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6855                                  (instance.name, target_node),
6856                                  errors.ECODE_STATE)
6857
6858     bep = self.cfg.GetClusterInfo().FillBE(instance)
6859
6860     for idx, dsk in enumerate(instance.disks):
6861       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6862         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6863                                    " cannot copy" % idx, errors.ECODE_STATE)
6864
6865     _CheckNodeOnline(self, target_node)
6866     _CheckNodeNotDrained(self, target_node)
6867     _CheckNodeVmCapable(self, target_node)
6868
6869     if instance.admin_up:
6870       # check memory requirements on the secondary node
6871       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6872                            instance.name, bep[constants.BE_MEMORY],
6873                            instance.hypervisor)
6874     else:
6875       self.LogInfo("Not checking memory on the secondary node as"
6876                    " instance will not be started")
6877
6878     # check bridge existance
6879     _CheckInstanceBridgesExist(self, instance, node=target_node)
6880
6881   def Exec(self, feedback_fn):
6882     """Move an instance.
6883
6884     The move is done by shutting it down on its present node, copying
6885     the data over (slow) and starting it on the new node.
6886
6887     """
6888     instance = self.instance
6889
6890     source_node = instance.primary_node
6891     target_node = self.target_node
6892
6893     self.LogInfo("Shutting down instance %s on source node %s",
6894                  instance.name, source_node)
6895
6896     result = self.rpc.call_instance_shutdown(source_node, instance,
6897                                              self.op.shutdown_timeout)
6898     msg = result.fail_msg
6899     if msg:
6900       if self.op.ignore_consistency:
6901         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6902                              " Proceeding anyway. Please make sure node"
6903                              " %s is down. Error details: %s",
6904                              instance.name, source_node, source_node, msg)
6905       else:
6906         raise errors.OpExecError("Could not shutdown instance %s on"
6907                                  " node %s: %s" %
6908                                  (instance.name, source_node, msg))
6909
6910     # create the target disks
6911     try:
6912       _CreateDisks(self, instance, target_node=target_node)
6913     except errors.OpExecError:
6914       self.LogWarning("Device creation failed, reverting...")
6915       try:
6916         _RemoveDisks(self, instance, target_node=target_node)
6917       finally:
6918         self.cfg.ReleaseDRBDMinors(instance.name)
6919         raise
6920
6921     cluster_name = self.cfg.GetClusterInfo().cluster_name
6922
6923     errs = []
6924     # activate, get path, copy the data over
6925     for idx, disk in enumerate(instance.disks):
6926       self.LogInfo("Copying data for disk %d", idx)
6927       result = self.rpc.call_blockdev_assemble(target_node, disk,
6928                                                instance.name, True, idx)
6929       if result.fail_msg:
6930         self.LogWarning("Can't assemble newly created disk %d: %s",
6931                         idx, result.fail_msg)
6932         errs.append(result.fail_msg)
6933         break
6934       dev_path = result.payload
6935       result = self.rpc.call_blockdev_export(source_node, disk,
6936                                              target_node, dev_path,
6937                                              cluster_name)
6938       if result.fail_msg:
6939         self.LogWarning("Can't copy data over for disk %d: %s",
6940                         idx, result.fail_msg)
6941         errs.append(result.fail_msg)
6942         break
6943
6944     if errs:
6945       self.LogWarning("Some disks failed to copy, aborting")
6946       try:
6947         _RemoveDisks(self, instance, target_node=target_node)
6948       finally:
6949         self.cfg.ReleaseDRBDMinors(instance.name)
6950         raise errors.OpExecError("Errors during disk copy: %s" %
6951                                  (",".join(errs),))
6952
6953     instance.primary_node = target_node
6954     self.cfg.Update(instance, feedback_fn)
6955
6956     self.LogInfo("Removing the disks on the original node")
6957     _RemoveDisks(self, instance, target_node=source_node)
6958
6959     # Only start the instance if it's marked as up
6960     if instance.admin_up:
6961       self.LogInfo("Starting instance %s on node %s",
6962                    instance.name, target_node)
6963
6964       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6965                                            ignore_secondaries=True)
6966       if not disks_ok:
6967         _ShutdownInstanceDisks(self, instance)
6968         raise errors.OpExecError("Can't activate the instance's disks")
6969
6970       result = self.rpc.call_instance_start(target_node, instance,
6971                                             None, None, False)
6972       msg = result.fail_msg
6973       if msg:
6974         _ShutdownInstanceDisks(self, instance)
6975         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6976                                  (instance.name, target_node, msg))
6977
6978
6979 class LUNodeMigrate(LogicalUnit):
6980   """Migrate all instances from a node.
6981
6982   """
6983   HPATH = "node-migrate"
6984   HTYPE = constants.HTYPE_NODE
6985   REQ_BGL = False
6986
6987   def CheckArguments(self):
6988     pass
6989
6990   def ExpandNames(self):
6991     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6992
6993     self.share_locks = _ShareAll()
6994     self.needed_locks = {
6995       locking.LEVEL_NODE: [self.op.node_name],
6996       }
6997
6998   def BuildHooksEnv(self):
6999     """Build hooks env.
7000
7001     This runs on the master, the primary and all the secondaries.
7002
7003     """
7004     return {
7005       "NODE_NAME": self.op.node_name,
7006       }
7007
7008   def BuildHooksNodes(self):
7009     """Build hooks nodes.
7010
7011     """
7012     nl = [self.cfg.GetMasterNode()]
7013     return (nl, nl)
7014
7015   def CheckPrereq(self):
7016     pass
7017
7018   def Exec(self, feedback_fn):
7019     # Prepare jobs for migration instances
7020     jobs = [
7021       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7022                                  mode=self.op.mode,
7023                                  live=self.op.live,
7024                                  iallocator=self.op.iallocator,
7025                                  target_node=self.op.target_node)]
7026       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7027       ]
7028
7029     # TODO: Run iallocator in this opcode and pass correct placement options to
7030     # OpInstanceMigrate. Since other jobs can modify the cluster between
7031     # running the iallocator and the actual migration, a good consistency model
7032     # will have to be found.
7033
7034     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7035             frozenset([self.op.node_name]))
7036
7037     return ResultWithJobs(jobs)
7038
7039
7040 class TLMigrateInstance(Tasklet):
7041   """Tasklet class for instance migration.
7042
7043   @type live: boolean
7044   @ivar live: whether the migration will be done live or non-live;
7045       this variable is initalized only after CheckPrereq has run
7046   @type cleanup: boolean
7047   @ivar cleanup: Wheater we cleanup from a failed migration
7048   @type iallocator: string
7049   @ivar iallocator: The iallocator used to determine target_node
7050   @type target_node: string
7051   @ivar target_node: If given, the target_node to reallocate the instance to
7052   @type failover: boolean
7053   @ivar failover: Whether operation results in failover or migration
7054   @type fallback: boolean
7055   @ivar fallback: Whether fallback to failover is allowed if migration not
7056                   possible
7057   @type ignore_consistency: boolean
7058   @ivar ignore_consistency: Wheter we should ignore consistency between source
7059                             and target node
7060   @type shutdown_timeout: int
7061   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7062
7063   """
7064   def __init__(self, lu, instance_name, cleanup=False,
7065                failover=False, fallback=False,
7066                ignore_consistency=False,
7067                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7068     """Initializes this class.
7069
7070     """
7071     Tasklet.__init__(self, lu)
7072
7073     # Parameters
7074     self.instance_name = instance_name
7075     self.cleanup = cleanup
7076     self.live = False # will be overridden later
7077     self.failover = failover
7078     self.fallback = fallback
7079     self.ignore_consistency = ignore_consistency
7080     self.shutdown_timeout = shutdown_timeout
7081
7082   def CheckPrereq(self):
7083     """Check prerequisites.
7084
7085     This checks that the instance is in the cluster.
7086
7087     """
7088     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7089     instance = self.cfg.GetInstanceInfo(instance_name)
7090     assert instance is not None
7091     self.instance = instance
7092
7093     if (not self.cleanup and not instance.admin_up and not self.failover and
7094         self.fallback):
7095       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7096                       " to failover")
7097       self.failover = True
7098
7099     if instance.disk_template not in constants.DTS_MIRRORED:
7100       if self.failover:
7101         text = "failovers"
7102       else:
7103         text = "migrations"
7104       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7105                                  " %s" % (instance.disk_template, text),
7106                                  errors.ECODE_STATE)
7107
7108     if instance.disk_template in constants.DTS_EXT_MIRROR:
7109       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7110
7111       if self.lu.op.iallocator:
7112         self._RunAllocator()
7113       else:
7114         # We set set self.target_node as it is required by
7115         # BuildHooksEnv
7116         self.target_node = self.lu.op.target_node
7117
7118       # self.target_node is already populated, either directly or by the
7119       # iallocator run
7120       target_node = self.target_node
7121       if self.target_node == instance.primary_node:
7122         raise errors.OpPrereqError("Cannot migrate instance %s"
7123                                    " to its primary (%s)" %
7124                                    (instance.name, instance.primary_node))
7125
7126       if len(self.lu.tasklets) == 1:
7127         # It is safe to release locks only when we're the only tasklet
7128         # in the LU
7129         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7130                       keep=[instance.primary_node, self.target_node])
7131
7132     else:
7133       secondary_nodes = instance.secondary_nodes
7134       if not secondary_nodes:
7135         raise errors.ConfigurationError("No secondary node but using"
7136                                         " %s disk template" %
7137                                         instance.disk_template)
7138       target_node = secondary_nodes[0]
7139       if self.lu.op.iallocator or (self.lu.op.target_node and
7140                                    self.lu.op.target_node != target_node):
7141         if self.failover:
7142           text = "failed over"
7143         else:
7144           text = "migrated"
7145         raise errors.OpPrereqError("Instances with disk template %s cannot"
7146                                    " be %s to arbitrary nodes"
7147                                    " (neither an iallocator nor a target"
7148                                    " node can be passed)" %
7149                                    (instance.disk_template, text),
7150                                    errors.ECODE_INVAL)
7151
7152     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7153
7154     # check memory requirements on the secondary node
7155     if not self.failover or instance.admin_up:
7156       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7157                            instance.name, i_be[constants.BE_MEMORY],
7158                            instance.hypervisor)
7159     else:
7160       self.lu.LogInfo("Not checking memory on the secondary node as"
7161                       " instance will not be started")
7162
7163     # check bridge existance
7164     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7165
7166     if not self.cleanup:
7167       _CheckNodeNotDrained(self.lu, target_node)
7168       if not self.failover:
7169         result = self.rpc.call_instance_migratable(instance.primary_node,
7170                                                    instance)
7171         if result.fail_msg and self.fallback:
7172           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7173                           " failover")
7174           self.failover = True
7175         else:
7176           result.Raise("Can't migrate, please use failover",
7177                        prereq=True, ecode=errors.ECODE_STATE)
7178
7179     assert not (self.failover and self.cleanup)
7180
7181     if not self.failover:
7182       if self.lu.op.live is not None and self.lu.op.mode is not None:
7183         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7184                                    " parameters are accepted",
7185                                    errors.ECODE_INVAL)
7186       if self.lu.op.live is not None:
7187         if self.lu.op.live:
7188           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7189         else:
7190           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7191         # reset the 'live' parameter to None so that repeated
7192         # invocations of CheckPrereq do not raise an exception
7193         self.lu.op.live = None
7194       elif self.lu.op.mode is None:
7195         # read the default value from the hypervisor
7196         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7197                                                 skip_globals=False)
7198         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7199
7200       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7201     else:
7202       # Failover is never live
7203       self.live = False
7204
7205   def _RunAllocator(self):
7206     """Run the allocator based on input opcode.
7207
7208     """
7209     ial = IAllocator(self.cfg, self.rpc,
7210                      mode=constants.IALLOCATOR_MODE_RELOC,
7211                      name=self.instance_name,
7212                      # TODO See why hail breaks with a single node below
7213                      relocate_from=[self.instance.primary_node,
7214                                     self.instance.primary_node],
7215                      )
7216
7217     ial.Run(self.lu.op.iallocator)
7218
7219     if not ial.success:
7220       raise errors.OpPrereqError("Can't compute nodes using"
7221                                  " iallocator '%s': %s" %
7222                                  (self.lu.op.iallocator, ial.info),
7223                                  errors.ECODE_NORES)
7224     if len(ial.result) != ial.required_nodes:
7225       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7226                                  " of nodes (%s), required %s" %
7227                                  (self.lu.op.iallocator, len(ial.result),
7228                                   ial.required_nodes), errors.ECODE_FAULT)
7229     self.target_node = ial.result[0]
7230     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7231                  self.instance_name, self.lu.op.iallocator,
7232                  utils.CommaJoin(ial.result))
7233
7234   def _WaitUntilSync(self):
7235     """Poll with custom rpc for disk sync.
7236
7237     This uses our own step-based rpc call.
7238
7239     """
7240     self.feedback_fn("* wait until resync is done")
7241     all_done = False
7242     while not all_done:
7243       all_done = True
7244       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7245                                             self.nodes_ip,
7246                                             self.instance.disks)
7247       min_percent = 100
7248       for node, nres in result.items():
7249         nres.Raise("Cannot resync disks on node %s" % node)
7250         node_done, node_percent = nres.payload
7251         all_done = all_done and node_done
7252         if node_percent is not None:
7253           min_percent = min(min_percent, node_percent)
7254       if not all_done:
7255         if min_percent < 100:
7256           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7257         time.sleep(2)
7258
7259   def _EnsureSecondary(self, node):
7260     """Demote a node to secondary.
7261
7262     """
7263     self.feedback_fn("* switching node %s to secondary mode" % node)
7264
7265     for dev in self.instance.disks:
7266       self.cfg.SetDiskID(dev, node)
7267
7268     result = self.rpc.call_blockdev_close(node, self.instance.name,
7269                                           self.instance.disks)
7270     result.Raise("Cannot change disk to secondary on node %s" % node)
7271
7272   def _GoStandalone(self):
7273     """Disconnect from the network.
7274
7275     """
7276     self.feedback_fn("* changing into standalone mode")
7277     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7278                                                self.instance.disks)
7279     for node, nres in result.items():
7280       nres.Raise("Cannot disconnect disks node %s" % node)
7281
7282   def _GoReconnect(self, multimaster):
7283     """Reconnect to the network.
7284
7285     """
7286     if multimaster:
7287       msg = "dual-master"
7288     else:
7289       msg = "single-master"
7290     self.feedback_fn("* changing disks into %s mode" % msg)
7291     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7292                                            self.instance.disks,
7293                                            self.instance.name, multimaster)
7294     for node, nres in result.items():
7295       nres.Raise("Cannot change disks config on node %s" % node)
7296
7297   def _ExecCleanup(self):
7298     """Try to cleanup after a failed migration.
7299
7300     The cleanup is done by:
7301       - check that the instance is running only on one node
7302         (and update the config if needed)
7303       - change disks on its secondary node to secondary
7304       - wait until disks are fully synchronized
7305       - disconnect from the network
7306       - change disks into single-master mode
7307       - wait again until disks are fully synchronized
7308
7309     """
7310     instance = self.instance
7311     target_node = self.target_node
7312     source_node = self.source_node
7313
7314     # check running on only one node
7315     self.feedback_fn("* checking where the instance actually runs"
7316                      " (if this hangs, the hypervisor might be in"
7317                      " a bad state)")
7318     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7319     for node, result in ins_l.items():
7320       result.Raise("Can't contact node %s" % node)
7321
7322     runningon_source = instance.name in ins_l[source_node].payload
7323     runningon_target = instance.name in ins_l[target_node].payload
7324
7325     if runningon_source and runningon_target:
7326       raise errors.OpExecError("Instance seems to be running on two nodes,"
7327                                " or the hypervisor is confused; you will have"
7328                                " to ensure manually that it runs only on one"
7329                                " and restart this operation")
7330
7331     if not (runningon_source or runningon_target):
7332       raise errors.OpExecError("Instance does not seem to be running at all;"
7333                                " in this case it's safer to repair by"
7334                                " running 'gnt-instance stop' to ensure disk"
7335                                " shutdown, and then restarting it")
7336
7337     if runningon_target:
7338       # the migration has actually succeeded, we need to update the config
7339       self.feedback_fn("* instance running on secondary node (%s),"
7340                        " updating config" % target_node)
7341       instance.primary_node = target_node
7342       self.cfg.Update(instance, self.feedback_fn)
7343       demoted_node = source_node
7344     else:
7345       self.feedback_fn("* instance confirmed to be running on its"
7346                        " primary node (%s)" % source_node)
7347       demoted_node = target_node
7348
7349     if instance.disk_template in constants.DTS_INT_MIRROR:
7350       self._EnsureSecondary(demoted_node)
7351       try:
7352         self._WaitUntilSync()
7353       except errors.OpExecError:
7354         # we ignore here errors, since if the device is standalone, it
7355         # won't be able to sync
7356         pass
7357       self._GoStandalone()
7358       self._GoReconnect(False)
7359       self._WaitUntilSync()
7360
7361     self.feedback_fn("* done")
7362
7363   def _RevertDiskStatus(self):
7364     """Try to revert the disk status after a failed migration.
7365
7366     """
7367     target_node = self.target_node
7368     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7369       return
7370
7371     try:
7372       self._EnsureSecondary(target_node)
7373       self._GoStandalone()
7374       self._GoReconnect(False)
7375       self._WaitUntilSync()
7376     except errors.OpExecError, err:
7377       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7378                          " please try to recover the instance manually;"
7379                          " error '%s'" % str(err))
7380
7381   def _AbortMigration(self):
7382     """Call the hypervisor code to abort a started migration.
7383
7384     """
7385     instance = self.instance
7386     target_node = self.target_node
7387     migration_info = self.migration_info
7388
7389     abort_result = self.rpc.call_finalize_migration(target_node,
7390                                                     instance,
7391                                                     migration_info,
7392                                                     False)
7393     abort_msg = abort_result.fail_msg
7394     if abort_msg:
7395       logging.error("Aborting migration failed on target node %s: %s",
7396                     target_node, abort_msg)
7397       # Don't raise an exception here, as we stil have to try to revert the
7398       # disk status, even if this step failed.
7399
7400   def _ExecMigration(self):
7401     """Migrate an instance.
7402
7403     The migrate is done by:
7404       - change the disks into dual-master mode
7405       - wait until disks are fully synchronized again
7406       - migrate the instance
7407       - change disks on the new secondary node (the old primary) to secondary
7408       - wait until disks are fully synchronized
7409       - change disks into single-master mode
7410
7411     """
7412     instance = self.instance
7413     target_node = self.target_node
7414     source_node = self.source_node
7415
7416     # Check for hypervisor version mismatch and warn the user.
7417     nodeinfo = self.rpc.call_node_info([source_node, target_node],
7418                                        None, self.instance.hypervisor)
7419     src_info = nodeinfo[source_node]
7420     dst_info = nodeinfo[target_node]
7421
7422     if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7423         (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7424       src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7425       dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7426       if src_version != dst_version:
7427         self.feedback_fn("* warning: hypervisor version mismatch between"
7428                          " source (%s) and target (%s) node" %
7429                          (src_version, dst_version))
7430
7431     self.feedback_fn("* checking disk consistency between source and target")
7432     for dev in instance.disks:
7433       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7434         raise errors.OpExecError("Disk %s is degraded or not fully"
7435                                  " synchronized on target node,"
7436                                  " aborting migration" % dev.iv_name)
7437
7438     # First get the migration information from the remote node
7439     result = self.rpc.call_migration_info(source_node, instance)
7440     msg = result.fail_msg
7441     if msg:
7442       log_err = ("Failed fetching source migration information from %s: %s" %
7443                  (source_node, msg))
7444       logging.error(log_err)
7445       raise errors.OpExecError(log_err)
7446
7447     self.migration_info = migration_info = result.payload
7448
7449     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7450       # Then switch the disks to master/master mode
7451       self._EnsureSecondary(target_node)
7452       self._GoStandalone()
7453       self._GoReconnect(True)
7454       self._WaitUntilSync()
7455
7456     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7457     result = self.rpc.call_accept_instance(target_node,
7458                                            instance,
7459                                            migration_info,
7460                                            self.nodes_ip[target_node])
7461
7462     msg = result.fail_msg
7463     if msg:
7464       logging.error("Instance pre-migration failed, trying to revert"
7465                     " disk status: %s", msg)
7466       self.feedback_fn("Pre-migration failed, aborting")
7467       self._AbortMigration()
7468       self._RevertDiskStatus()
7469       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7470                                (instance.name, msg))
7471
7472     self.feedback_fn("* migrating instance to %s" % target_node)
7473     result = self.rpc.call_instance_migrate(source_node, instance,
7474                                             self.nodes_ip[target_node],
7475                                             self.live)
7476     msg = result.fail_msg
7477     if msg:
7478       logging.error("Instance migration failed, trying to revert"
7479                     " disk status: %s", msg)
7480       self.feedback_fn("Migration failed, aborting")
7481       self._AbortMigration()
7482       self._RevertDiskStatus()
7483       raise errors.OpExecError("Could not migrate instance %s: %s" %
7484                                (instance.name, msg))
7485
7486     instance.primary_node = target_node
7487     # distribute new instance config to the other nodes
7488     self.cfg.Update(instance, self.feedback_fn)
7489
7490     result = self.rpc.call_finalize_migration(target_node,
7491                                               instance,
7492                                               migration_info,
7493                                               True)
7494     msg = result.fail_msg
7495     if msg:
7496       logging.error("Instance migration succeeded, but finalization failed:"
7497                     " %s", msg)
7498       raise errors.OpExecError("Could not finalize instance migration: %s" %
7499                                msg)
7500
7501     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7502       self._EnsureSecondary(source_node)
7503       self._WaitUntilSync()
7504       self._GoStandalone()
7505       self._GoReconnect(False)
7506       self._WaitUntilSync()
7507
7508     self.feedback_fn("* done")
7509
7510   def _ExecFailover(self):
7511     """Failover an instance.
7512
7513     The failover is done by shutting it down on its present node and
7514     starting it on the secondary.
7515
7516     """
7517     instance = self.instance
7518     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7519
7520     source_node = instance.primary_node
7521     target_node = self.target_node
7522
7523     if instance.admin_up:
7524       self.feedback_fn("* checking disk consistency between source and target")
7525       for dev in instance.disks:
7526         # for drbd, these are drbd over lvm
7527         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7528           if primary_node.offline:
7529             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7530                              " target node %s" %
7531                              (primary_node.name, dev.iv_name, target_node))
7532           elif not self.ignore_consistency:
7533             raise errors.OpExecError("Disk %s is degraded on target node,"
7534                                      " aborting failover" % dev.iv_name)
7535     else:
7536       self.feedback_fn("* not checking disk consistency as instance is not"
7537                        " running")
7538
7539     self.feedback_fn("* shutting down instance on source node")
7540     logging.info("Shutting down instance %s on node %s",
7541                  instance.name, source_node)
7542
7543     result = self.rpc.call_instance_shutdown(source_node, instance,
7544                                              self.shutdown_timeout)
7545     msg = result.fail_msg
7546     if msg:
7547       if self.ignore_consistency or primary_node.offline:
7548         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7549                            " proceeding anyway; please make sure node"
7550                            " %s is down; error details: %s",
7551                            instance.name, source_node, source_node, msg)
7552       else:
7553         raise errors.OpExecError("Could not shutdown instance %s on"
7554                                  " node %s: %s" %
7555                                  (instance.name, source_node, msg))
7556
7557     self.feedback_fn("* deactivating the instance's disks on source node")
7558     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7559       raise errors.OpExecError("Can't shut down the instance's disks")
7560
7561     instance.primary_node = target_node
7562     # distribute new instance config to the other nodes
7563     self.cfg.Update(instance, self.feedback_fn)
7564
7565     # Only start the instance if it's marked as up
7566     if instance.admin_up:
7567       self.feedback_fn("* activating the instance's disks on target node %s" %
7568                        target_node)
7569       logging.info("Starting instance %s on node %s",
7570                    instance.name, target_node)
7571
7572       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7573                                            ignore_secondaries=True)
7574       if not disks_ok:
7575         _ShutdownInstanceDisks(self.lu, instance)
7576         raise errors.OpExecError("Can't activate the instance's disks")
7577
7578       self.feedback_fn("* starting the instance on the target node %s" %
7579                        target_node)
7580       result = self.rpc.call_instance_start(target_node, instance, None, None,
7581                                             False)
7582       msg = result.fail_msg
7583       if msg:
7584         _ShutdownInstanceDisks(self.lu, instance)
7585         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7586                                  (instance.name, target_node, msg))
7587
7588   def Exec(self, feedback_fn):
7589     """Perform the migration.
7590
7591     """
7592     self.feedback_fn = feedback_fn
7593     self.source_node = self.instance.primary_node
7594
7595     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7596     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7597       self.target_node = self.instance.secondary_nodes[0]
7598       # Otherwise self.target_node has been populated either
7599       # directly, or through an iallocator.
7600
7601     self.all_nodes = [self.source_node, self.target_node]
7602     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7603                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7604
7605     if self.failover:
7606       feedback_fn("Failover instance %s" % self.instance.name)
7607       self._ExecFailover()
7608     else:
7609       feedback_fn("Migrating instance %s" % self.instance.name)
7610
7611       if self.cleanup:
7612         return self._ExecCleanup()
7613       else:
7614         return self._ExecMigration()
7615
7616
7617 def _CreateBlockDev(lu, node, instance, device, force_create,
7618                     info, force_open):
7619   """Create a tree of block devices on a given node.
7620
7621   If this device type has to be created on secondaries, create it and
7622   all its children.
7623
7624   If not, just recurse to children keeping the same 'force' value.
7625
7626   @param lu: the lu on whose behalf we execute
7627   @param node: the node on which to create the device
7628   @type instance: L{objects.Instance}
7629   @param instance: the instance which owns the device
7630   @type device: L{objects.Disk}
7631   @param device: the device to create
7632   @type force_create: boolean
7633   @param force_create: whether to force creation of this device; this
7634       will be change to True whenever we find a device which has
7635       CreateOnSecondary() attribute
7636   @param info: the extra 'metadata' we should attach to the device
7637       (this will be represented as a LVM tag)
7638   @type force_open: boolean
7639   @param force_open: this parameter will be passes to the
7640       L{backend.BlockdevCreate} function where it specifies
7641       whether we run on primary or not, and it affects both
7642       the child assembly and the device own Open() execution
7643
7644   """
7645   if device.CreateOnSecondary():
7646     force_create = True
7647
7648   if device.children:
7649     for child in device.children:
7650       _CreateBlockDev(lu, node, instance, child, force_create,
7651                       info, force_open)
7652
7653   if not force_create:
7654     return
7655
7656   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7657
7658
7659 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7660   """Create a single block device on a given node.
7661
7662   This will not recurse over children of the device, so they must be
7663   created in advance.
7664
7665   @param lu: the lu on whose behalf we execute
7666   @param node: the node on which to create the device
7667   @type instance: L{objects.Instance}
7668   @param instance: the instance which owns the device
7669   @type device: L{objects.Disk}
7670   @param device: the device to create
7671   @param info: the extra 'metadata' we should attach to the device
7672       (this will be represented as a LVM tag)
7673   @type force_open: boolean
7674   @param force_open: this parameter will be passes to the
7675       L{backend.BlockdevCreate} function where it specifies
7676       whether we run on primary or not, and it affects both
7677       the child assembly and the device own Open() execution
7678
7679   """
7680   lu.cfg.SetDiskID(device, node)
7681   result = lu.rpc.call_blockdev_create(node, device, device.size,
7682                                        instance.name, force_open, info)
7683   result.Raise("Can't create block device %s on"
7684                " node %s for instance %s" % (device, node, instance.name))
7685   if device.physical_id is None:
7686     device.physical_id = result.payload
7687
7688
7689 def _GenerateUniqueNames(lu, exts):
7690   """Generate a suitable LV name.
7691
7692   This will generate a logical volume name for the given instance.
7693
7694   """
7695   results = []
7696   for val in exts:
7697     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7698     results.append("%s%s" % (new_id, val))
7699   return results
7700
7701
7702 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7703                          iv_name, p_minor, s_minor):
7704   """Generate a drbd8 device complete with its children.
7705
7706   """
7707   assert len(vgnames) == len(names) == 2
7708   port = lu.cfg.AllocatePort()
7709   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7710   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7711                           logical_id=(vgnames[0], names[0]))
7712   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7713                           logical_id=(vgnames[1], names[1]))
7714   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7715                           logical_id=(primary, secondary, port,
7716                                       p_minor, s_minor,
7717                                       shared_secret),
7718                           children=[dev_data, dev_meta],
7719                           iv_name=iv_name)
7720   return drbd_dev
7721
7722
7723 def _GenerateDiskTemplate(lu, template_name,
7724                           instance_name, primary_node,
7725                           secondary_nodes, disk_info,
7726                           file_storage_dir, file_driver,
7727                           base_index, feedback_fn):
7728   """Generate the entire disk layout for a given template type.
7729
7730   """
7731   #TODO: compute space requirements
7732
7733   vgname = lu.cfg.GetVGName()
7734   disk_count = len(disk_info)
7735   disks = []
7736   if template_name == constants.DT_DISKLESS:
7737     pass
7738   elif template_name == constants.DT_PLAIN:
7739     if len(secondary_nodes) != 0:
7740       raise errors.ProgrammerError("Wrong template configuration")
7741
7742     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7743                                       for i in range(disk_count)])
7744     for idx, disk in enumerate(disk_info):
7745       disk_index = idx + base_index
7746       vg = disk.get(constants.IDISK_VG, vgname)
7747       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7748       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7749                               size=disk[constants.IDISK_SIZE],
7750                               logical_id=(vg, names[idx]),
7751                               iv_name="disk/%d" % disk_index,
7752                               mode=disk[constants.IDISK_MODE])
7753       disks.append(disk_dev)
7754   elif template_name == constants.DT_DRBD8:
7755     if len(secondary_nodes) != 1:
7756       raise errors.ProgrammerError("Wrong template configuration")
7757     remote_node = secondary_nodes[0]
7758     minors = lu.cfg.AllocateDRBDMinor(
7759       [primary_node, remote_node] * len(disk_info), instance_name)
7760
7761     names = []
7762     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7763                                                for i in range(disk_count)]):
7764       names.append(lv_prefix + "_data")
7765       names.append(lv_prefix + "_meta")
7766     for idx, disk in enumerate(disk_info):
7767       disk_index = idx + base_index
7768       data_vg = disk.get(constants.IDISK_VG, vgname)
7769       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7770       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7771                                       disk[constants.IDISK_SIZE],
7772                                       [data_vg, meta_vg],
7773                                       names[idx * 2:idx * 2 + 2],
7774                                       "disk/%d" % disk_index,
7775                                       minors[idx * 2], minors[idx * 2 + 1])
7776       disk_dev.mode = disk[constants.IDISK_MODE]
7777       disks.append(disk_dev)
7778   elif template_name == constants.DT_FILE:
7779     if len(secondary_nodes) != 0:
7780       raise errors.ProgrammerError("Wrong template configuration")
7781
7782     opcodes.RequireFileStorage()
7783
7784     for idx, disk in enumerate(disk_info):
7785       disk_index = idx + base_index
7786       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7787                               size=disk[constants.IDISK_SIZE],
7788                               iv_name="disk/%d" % disk_index,
7789                               logical_id=(file_driver,
7790                                           "%s/disk%d" % (file_storage_dir,
7791                                                          disk_index)),
7792                               mode=disk[constants.IDISK_MODE])
7793       disks.append(disk_dev)
7794   elif template_name == constants.DT_SHARED_FILE:
7795     if len(secondary_nodes) != 0:
7796       raise errors.ProgrammerError("Wrong template configuration")
7797
7798     opcodes.RequireSharedFileStorage()
7799
7800     for idx, disk in enumerate(disk_info):
7801       disk_index = idx + base_index
7802       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7803                               size=disk[constants.IDISK_SIZE],
7804                               iv_name="disk/%d" % disk_index,
7805                               logical_id=(file_driver,
7806                                           "%s/disk%d" % (file_storage_dir,
7807                                                          disk_index)),
7808                               mode=disk[constants.IDISK_MODE])
7809       disks.append(disk_dev)
7810   elif template_name == constants.DT_BLOCK:
7811     if len(secondary_nodes) != 0:
7812       raise errors.ProgrammerError("Wrong template configuration")
7813
7814     for idx, disk in enumerate(disk_info):
7815       disk_index = idx + base_index
7816       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7817                               size=disk[constants.IDISK_SIZE],
7818                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7819                                           disk[constants.IDISK_ADOPT]),
7820                               iv_name="disk/%d" % disk_index,
7821                               mode=disk[constants.IDISK_MODE])
7822       disks.append(disk_dev)
7823
7824   else:
7825     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7826   return disks
7827
7828
7829 def _GetInstanceInfoText(instance):
7830   """Compute that text that should be added to the disk's metadata.
7831
7832   """
7833   return "originstname+%s" % instance.name
7834
7835
7836 def _CalcEta(time_taken, written, total_size):
7837   """Calculates the ETA based on size written and total size.
7838
7839   @param time_taken: The time taken so far
7840   @param written: amount written so far
7841   @param total_size: The total size of data to be written
7842   @return: The remaining time in seconds
7843
7844   """
7845   avg_time = time_taken / float(written)
7846   return (total_size - written) * avg_time
7847
7848
7849 def _WipeDisks(lu, instance):
7850   """Wipes instance disks.
7851
7852   @type lu: L{LogicalUnit}
7853   @param lu: the logical unit on whose behalf we execute
7854   @type instance: L{objects.Instance}
7855   @param instance: the instance whose disks we should create
7856   @return: the success of the wipe
7857
7858   """
7859   node = instance.primary_node
7860
7861   for device in instance.disks:
7862     lu.cfg.SetDiskID(device, node)
7863
7864   logging.info("Pause sync of instance %s disks", instance.name)
7865   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7866
7867   for idx, success in enumerate(result.payload):
7868     if not success:
7869       logging.warn("pause-sync of instance %s for disks %d failed",
7870                    instance.name, idx)
7871
7872   try:
7873     for idx, device in enumerate(instance.disks):
7874       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7875       # MAX_WIPE_CHUNK at max
7876       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7877                             constants.MIN_WIPE_CHUNK_PERCENT)
7878       # we _must_ make this an int, otherwise rounding errors will
7879       # occur
7880       wipe_chunk_size = int(wipe_chunk_size)
7881
7882       lu.LogInfo("* Wiping disk %d", idx)
7883       logging.info("Wiping disk %d for instance %s, node %s using"
7884                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7885
7886       offset = 0
7887       size = device.size
7888       last_output = 0
7889       start_time = time.time()
7890
7891       while offset < size:
7892         wipe_size = min(wipe_chunk_size, size - offset)
7893         logging.debug("Wiping disk %d, offset %s, chunk %s",
7894                       idx, offset, wipe_size)
7895         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7896         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7897                      (idx, offset, wipe_size))
7898         now = time.time()
7899         offset += wipe_size
7900         if now - last_output >= 60:
7901           eta = _CalcEta(now - start_time, offset, size)
7902           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7903                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7904           last_output = now
7905   finally:
7906     logging.info("Resume sync of instance %s disks", instance.name)
7907
7908     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7909
7910     for idx, success in enumerate(result.payload):
7911       if not success:
7912         lu.LogWarning("Resume sync of disk %d failed, please have a"
7913                       " look at the status and troubleshoot the issue", idx)
7914         logging.warn("resume-sync of instance %s for disks %d failed",
7915                      instance.name, idx)
7916
7917
7918 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7919   """Create all disks for an instance.
7920
7921   This abstracts away some work from AddInstance.
7922
7923   @type lu: L{LogicalUnit}
7924   @param lu: the logical unit on whose behalf we execute
7925   @type instance: L{objects.Instance}
7926   @param instance: the instance whose disks we should create
7927   @type to_skip: list
7928   @param to_skip: list of indices to skip
7929   @type target_node: string
7930   @param target_node: if passed, overrides the target node for creation
7931   @rtype: boolean
7932   @return: the success of the creation
7933
7934   """
7935   info = _GetInstanceInfoText(instance)
7936   if target_node is None:
7937     pnode = instance.primary_node
7938     all_nodes = instance.all_nodes
7939   else:
7940     pnode = target_node
7941     all_nodes = [pnode]
7942
7943   if instance.disk_template in constants.DTS_FILEBASED:
7944     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7945     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7946
7947     result.Raise("Failed to create directory '%s' on"
7948                  " node %s" % (file_storage_dir, pnode))
7949
7950   # Note: this needs to be kept in sync with adding of disks in
7951   # LUInstanceSetParams
7952   for idx, device in enumerate(instance.disks):
7953     if to_skip and idx in to_skip:
7954       continue
7955     logging.info("Creating volume %s for instance %s",
7956                  device.iv_name, instance.name)
7957     #HARDCODE
7958     for node in all_nodes:
7959       f_create = node == pnode
7960       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7961
7962
7963 def _RemoveDisks(lu, instance, target_node=None):
7964   """Remove all disks for an instance.
7965
7966   This abstracts away some work from `AddInstance()` and
7967   `RemoveInstance()`. Note that in case some of the devices couldn't
7968   be removed, the removal will continue with the other ones (compare
7969   with `_CreateDisks()`).
7970
7971   @type lu: L{LogicalUnit}
7972   @param lu: the logical unit on whose behalf we execute
7973   @type instance: L{objects.Instance}
7974   @param instance: the instance whose disks we should remove
7975   @type target_node: string
7976   @param target_node: used to override the node on which to remove the disks
7977   @rtype: boolean
7978   @return: the success of the removal
7979
7980   """
7981   logging.info("Removing block devices for instance %s", instance.name)
7982
7983   all_result = True
7984   for device in instance.disks:
7985     if target_node:
7986       edata = [(target_node, device)]
7987     else:
7988       edata = device.ComputeNodeTree(instance.primary_node)
7989     for node, disk in edata:
7990       lu.cfg.SetDiskID(disk, node)
7991       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7992       if msg:
7993         lu.LogWarning("Could not remove block device %s on node %s,"
7994                       " continuing anyway: %s", device.iv_name, node, msg)
7995         all_result = False
7996
7997   if instance.disk_template == constants.DT_FILE:
7998     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7999     if target_node:
8000       tgt = target_node
8001     else:
8002       tgt = instance.primary_node
8003     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8004     if result.fail_msg:
8005       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8006                     file_storage_dir, instance.primary_node, result.fail_msg)
8007       all_result = False
8008
8009   return all_result
8010
8011
8012 def _ComputeDiskSizePerVG(disk_template, disks):
8013   """Compute disk size requirements in the volume group
8014
8015   """
8016   def _compute(disks, payload):
8017     """Universal algorithm.
8018
8019     """
8020     vgs = {}
8021     for disk in disks:
8022       vgs[disk[constants.IDISK_VG]] = \
8023         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8024
8025     return vgs
8026
8027   # Required free disk space as a function of disk and swap space
8028   req_size_dict = {
8029     constants.DT_DISKLESS: {},
8030     constants.DT_PLAIN: _compute(disks, 0),
8031     # 128 MB are added for drbd metadata for each disk
8032     constants.DT_DRBD8: _compute(disks, 128),
8033     constants.DT_FILE: {},
8034     constants.DT_SHARED_FILE: {},
8035   }
8036
8037   if disk_template not in req_size_dict:
8038     raise errors.ProgrammerError("Disk template '%s' size requirement"
8039                                  " is unknown" % disk_template)
8040
8041   return req_size_dict[disk_template]
8042
8043
8044 def _ComputeDiskSize(disk_template, disks):
8045   """Compute disk size requirements in the volume group
8046
8047   """
8048   # Required free disk space as a function of disk and swap space
8049   req_size_dict = {
8050     constants.DT_DISKLESS: None,
8051     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8052     # 128 MB are added for drbd metadata for each disk
8053     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8054     constants.DT_FILE: None,
8055     constants.DT_SHARED_FILE: 0,
8056     constants.DT_BLOCK: 0,
8057   }
8058
8059   if disk_template not in req_size_dict:
8060     raise errors.ProgrammerError("Disk template '%s' size requirement"
8061                                  " is unknown" % disk_template)
8062
8063   return req_size_dict[disk_template]
8064
8065
8066 def _FilterVmNodes(lu, nodenames):
8067   """Filters out non-vm_capable nodes from a list.
8068
8069   @type lu: L{LogicalUnit}
8070   @param lu: the logical unit for which we check
8071   @type nodenames: list
8072   @param nodenames: the list of nodes on which we should check
8073   @rtype: list
8074   @return: the list of vm-capable nodes
8075
8076   """
8077   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8078   return [name for name in nodenames if name not in vm_nodes]
8079
8080
8081 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8082   """Hypervisor parameter validation.
8083
8084   This function abstract the hypervisor parameter validation to be
8085   used in both instance create and instance modify.
8086
8087   @type lu: L{LogicalUnit}
8088   @param lu: the logical unit for which we check
8089   @type nodenames: list
8090   @param nodenames: the list of nodes on which we should check
8091   @type hvname: string
8092   @param hvname: the name of the hypervisor we should use
8093   @type hvparams: dict
8094   @param hvparams: the parameters which we need to check
8095   @raise errors.OpPrereqError: if the parameters are not valid
8096
8097   """
8098   nodenames = _FilterVmNodes(lu, nodenames)
8099   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8100                                                   hvname,
8101                                                   hvparams)
8102   for node in nodenames:
8103     info = hvinfo[node]
8104     if info.offline:
8105       continue
8106     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8107
8108
8109 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8110   """OS parameters validation.
8111
8112   @type lu: L{LogicalUnit}
8113   @param lu: the logical unit for which we check
8114   @type required: boolean
8115   @param required: whether the validation should fail if the OS is not
8116       found
8117   @type nodenames: list
8118   @param nodenames: the list of nodes on which we should check
8119   @type osname: string
8120   @param osname: the name of the hypervisor we should use
8121   @type osparams: dict
8122   @param osparams: the parameters which we need to check
8123   @raise errors.OpPrereqError: if the parameters are not valid
8124
8125   """
8126   nodenames = _FilterVmNodes(lu, nodenames)
8127   result = lu.rpc.call_os_validate(required, nodenames, osname,
8128                                    [constants.OS_VALIDATE_PARAMETERS],
8129                                    osparams)
8130   for node, nres in result.items():
8131     # we don't check for offline cases since this should be run only
8132     # against the master node and/or an instance's nodes
8133     nres.Raise("OS Parameters validation failed on node %s" % node)
8134     if not nres.payload:
8135       lu.LogInfo("OS %s not found on node %s, validation skipped",
8136                  osname, node)
8137
8138
8139 class LUInstanceCreate(LogicalUnit):
8140   """Create an instance.
8141
8142   """
8143   HPATH = "instance-add"
8144   HTYPE = constants.HTYPE_INSTANCE
8145   REQ_BGL = False
8146
8147   def CheckArguments(self):
8148     """Check arguments.
8149
8150     """
8151     # do not require name_check to ease forward/backward compatibility
8152     # for tools
8153     if self.op.no_install and self.op.start:
8154       self.LogInfo("No-installation mode selected, disabling startup")
8155       self.op.start = False
8156     # validate/normalize the instance name
8157     self.op.instance_name = \
8158       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8159
8160     if self.op.ip_check and not self.op.name_check:
8161       # TODO: make the ip check more flexible and not depend on the name check
8162       raise errors.OpPrereqError("Cannot do IP address check without a name"
8163                                  " check", errors.ECODE_INVAL)
8164
8165     # check nics' parameter names
8166     for nic in self.op.nics:
8167       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8168
8169     # check disks. parameter names and consistent adopt/no-adopt strategy
8170     has_adopt = has_no_adopt = False
8171     for disk in self.op.disks:
8172       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8173       if constants.IDISK_ADOPT in disk:
8174         has_adopt = True
8175       else:
8176         has_no_adopt = True
8177     if has_adopt and has_no_adopt:
8178       raise errors.OpPrereqError("Either all disks are adopted or none is",
8179                                  errors.ECODE_INVAL)
8180     if has_adopt:
8181       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8182         raise errors.OpPrereqError("Disk adoption is not supported for the"
8183                                    " '%s' disk template" %
8184                                    self.op.disk_template,
8185                                    errors.ECODE_INVAL)
8186       if self.op.iallocator is not None:
8187         raise errors.OpPrereqError("Disk adoption not allowed with an"
8188                                    " iallocator script", errors.ECODE_INVAL)
8189       if self.op.mode == constants.INSTANCE_IMPORT:
8190         raise errors.OpPrereqError("Disk adoption not allowed for"
8191                                    " instance import", errors.ECODE_INVAL)
8192     else:
8193       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8194         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8195                                    " but no 'adopt' parameter given" %
8196                                    self.op.disk_template,
8197                                    errors.ECODE_INVAL)
8198
8199     self.adopt_disks = has_adopt
8200
8201     # instance name verification
8202     if self.op.name_check:
8203       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8204       self.op.instance_name = self.hostname1.name
8205       # used in CheckPrereq for ip ping check
8206       self.check_ip = self.hostname1.ip
8207     else:
8208       self.check_ip = None
8209
8210     # file storage checks
8211     if (self.op.file_driver and
8212         not self.op.file_driver in constants.FILE_DRIVER):
8213       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8214                                  self.op.file_driver, errors.ECODE_INVAL)
8215
8216     if self.op.disk_template == constants.DT_FILE:
8217       opcodes.RequireFileStorage()
8218     elif self.op.disk_template == constants.DT_SHARED_FILE:
8219       opcodes.RequireSharedFileStorage()
8220
8221     ### Node/iallocator related checks
8222     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8223
8224     if self.op.pnode is not None:
8225       if self.op.disk_template in constants.DTS_INT_MIRROR:
8226         if self.op.snode is None:
8227           raise errors.OpPrereqError("The networked disk templates need"
8228                                      " a mirror node", errors.ECODE_INVAL)
8229       elif self.op.snode:
8230         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8231                         " template")
8232         self.op.snode = None
8233
8234     self._cds = _GetClusterDomainSecret()
8235
8236     if self.op.mode == constants.INSTANCE_IMPORT:
8237       # On import force_variant must be True, because if we forced it at
8238       # initial install, our only chance when importing it back is that it
8239       # works again!
8240       self.op.force_variant = True
8241
8242       if self.op.no_install:
8243         self.LogInfo("No-installation mode has no effect during import")
8244
8245     elif self.op.mode == constants.INSTANCE_CREATE:
8246       if self.op.os_type is None:
8247         raise errors.OpPrereqError("No guest OS specified",
8248                                    errors.ECODE_INVAL)
8249       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8250         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8251                                    " installation" % self.op.os_type,
8252                                    errors.ECODE_STATE)
8253       if self.op.disk_template is None:
8254         raise errors.OpPrereqError("No disk template specified",
8255                                    errors.ECODE_INVAL)
8256
8257     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8258       # Check handshake to ensure both clusters have the same domain secret
8259       src_handshake = self.op.source_handshake
8260       if not src_handshake:
8261         raise errors.OpPrereqError("Missing source handshake",
8262                                    errors.ECODE_INVAL)
8263
8264       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8265                                                            src_handshake)
8266       if errmsg:
8267         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8268                                    errors.ECODE_INVAL)
8269
8270       # Load and check source CA
8271       self.source_x509_ca_pem = self.op.source_x509_ca
8272       if not self.source_x509_ca_pem:
8273         raise errors.OpPrereqError("Missing source X509 CA",
8274                                    errors.ECODE_INVAL)
8275
8276       try:
8277         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8278                                                     self._cds)
8279       except OpenSSL.crypto.Error, err:
8280         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8281                                    (err, ), errors.ECODE_INVAL)
8282
8283       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8284       if errcode is not None:
8285         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8286                                    errors.ECODE_INVAL)
8287
8288       self.source_x509_ca = cert
8289
8290       src_instance_name = self.op.source_instance_name
8291       if not src_instance_name:
8292         raise errors.OpPrereqError("Missing source instance name",
8293                                    errors.ECODE_INVAL)
8294
8295       self.source_instance_name = \
8296           netutils.GetHostname(name=src_instance_name).name
8297
8298     else:
8299       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8300                                  self.op.mode, errors.ECODE_INVAL)
8301
8302   def ExpandNames(self):
8303     """ExpandNames for CreateInstance.
8304
8305     Figure out the right locks for instance creation.
8306
8307     """
8308     self.needed_locks = {}
8309
8310     instance_name = self.op.instance_name
8311     # this is just a preventive check, but someone might still add this
8312     # instance in the meantime, and creation will fail at lock-add time
8313     if instance_name in self.cfg.GetInstanceList():
8314       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8315                                  instance_name, errors.ECODE_EXISTS)
8316
8317     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8318
8319     if self.op.iallocator:
8320       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8321     else:
8322       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8323       nodelist = [self.op.pnode]
8324       if self.op.snode is not None:
8325         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8326         nodelist.append(self.op.snode)
8327       self.needed_locks[locking.LEVEL_NODE] = nodelist
8328
8329     # in case of import lock the source node too
8330     if self.op.mode == constants.INSTANCE_IMPORT:
8331       src_node = self.op.src_node
8332       src_path = self.op.src_path
8333
8334       if src_path is None:
8335         self.op.src_path = src_path = self.op.instance_name
8336
8337       if src_node is None:
8338         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8339         self.op.src_node = None
8340         if os.path.isabs(src_path):
8341           raise errors.OpPrereqError("Importing an instance from a path"
8342                                      " requires a source node option",
8343                                      errors.ECODE_INVAL)
8344       else:
8345         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8346         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8347           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8348         if not os.path.isabs(src_path):
8349           self.op.src_path = src_path = \
8350             utils.PathJoin(constants.EXPORT_DIR, src_path)
8351
8352   def _RunAllocator(self):
8353     """Run the allocator based on input opcode.
8354
8355     """
8356     nics = [n.ToDict() for n in self.nics]
8357     ial = IAllocator(self.cfg, self.rpc,
8358                      mode=constants.IALLOCATOR_MODE_ALLOC,
8359                      name=self.op.instance_name,
8360                      disk_template=self.op.disk_template,
8361                      tags=self.op.tags,
8362                      os=self.op.os_type,
8363                      vcpus=self.be_full[constants.BE_VCPUS],
8364                      memory=self.be_full[constants.BE_MEMORY],
8365                      disks=self.disks,
8366                      nics=nics,
8367                      hypervisor=self.op.hypervisor,
8368                      )
8369
8370     ial.Run(self.op.iallocator)
8371
8372     if not ial.success:
8373       raise errors.OpPrereqError("Can't compute nodes using"
8374                                  " iallocator '%s': %s" %
8375                                  (self.op.iallocator, ial.info),
8376                                  errors.ECODE_NORES)
8377     if len(ial.result) != ial.required_nodes:
8378       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8379                                  " of nodes (%s), required %s" %
8380                                  (self.op.iallocator, len(ial.result),
8381                                   ial.required_nodes), errors.ECODE_FAULT)
8382     self.op.pnode = ial.result[0]
8383     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8384                  self.op.instance_name, self.op.iallocator,
8385                  utils.CommaJoin(ial.result))
8386     if ial.required_nodes == 2:
8387       self.op.snode = ial.result[1]
8388
8389   def BuildHooksEnv(self):
8390     """Build hooks env.
8391
8392     This runs on master, primary and secondary nodes of the instance.
8393
8394     """
8395     env = {
8396       "ADD_MODE": self.op.mode,
8397       }
8398     if self.op.mode == constants.INSTANCE_IMPORT:
8399       env["SRC_NODE"] = self.op.src_node
8400       env["SRC_PATH"] = self.op.src_path
8401       env["SRC_IMAGES"] = self.src_images
8402
8403     env.update(_BuildInstanceHookEnv(
8404       name=self.op.instance_name,
8405       primary_node=self.op.pnode,
8406       secondary_nodes=self.secondaries,
8407       status=self.op.start,
8408       os_type=self.op.os_type,
8409       memory=self.be_full[constants.BE_MEMORY],
8410       vcpus=self.be_full[constants.BE_VCPUS],
8411       nics=_NICListToTuple(self, self.nics),
8412       disk_template=self.op.disk_template,
8413       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8414              for d in self.disks],
8415       bep=self.be_full,
8416       hvp=self.hv_full,
8417       hypervisor_name=self.op.hypervisor,
8418       tags=self.op.tags,
8419     ))
8420
8421     return env
8422
8423   def BuildHooksNodes(self):
8424     """Build hooks nodes.
8425
8426     """
8427     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8428     return nl, nl
8429
8430   def _ReadExportInfo(self):
8431     """Reads the export information from disk.
8432
8433     It will override the opcode source node and path with the actual
8434     information, if these two were not specified before.
8435
8436     @return: the export information
8437
8438     """
8439     assert self.op.mode == constants.INSTANCE_IMPORT
8440
8441     src_node = self.op.src_node
8442     src_path = self.op.src_path
8443
8444     if src_node is None:
8445       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8446       exp_list = self.rpc.call_export_list(locked_nodes)
8447       found = False
8448       for node in exp_list:
8449         if exp_list[node].fail_msg:
8450           continue
8451         if src_path in exp_list[node].payload:
8452           found = True
8453           self.op.src_node = src_node = node
8454           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8455                                                        src_path)
8456           break
8457       if not found:
8458         raise errors.OpPrereqError("No export found for relative path %s" %
8459                                     src_path, errors.ECODE_INVAL)
8460
8461     _CheckNodeOnline(self, src_node)
8462     result = self.rpc.call_export_info(src_node, src_path)
8463     result.Raise("No export or invalid export found in dir %s" % src_path)
8464
8465     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8466     if not export_info.has_section(constants.INISECT_EXP):
8467       raise errors.ProgrammerError("Corrupted export config",
8468                                    errors.ECODE_ENVIRON)
8469
8470     ei_version = export_info.get(constants.INISECT_EXP, "version")
8471     if (int(ei_version) != constants.EXPORT_VERSION):
8472       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8473                                  (ei_version, constants.EXPORT_VERSION),
8474                                  errors.ECODE_ENVIRON)
8475     return export_info
8476
8477   def _ReadExportParams(self, einfo):
8478     """Use export parameters as defaults.
8479
8480     In case the opcode doesn't specify (as in override) some instance
8481     parameters, then try to use them from the export information, if
8482     that declares them.
8483
8484     """
8485     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8486
8487     if self.op.disk_template is None:
8488       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8489         self.op.disk_template = einfo.get(constants.INISECT_INS,
8490                                           "disk_template")
8491       else:
8492         raise errors.OpPrereqError("No disk template specified and the export"
8493                                    " is missing the disk_template information",
8494                                    errors.ECODE_INVAL)
8495
8496     if not self.op.disks:
8497       if einfo.has_option(constants.INISECT_INS, "disk_count"):
8498         disks = []
8499         # TODO: import the disk iv_name too
8500         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8501           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8502           disks.append({constants.IDISK_SIZE: disk_sz})
8503         self.op.disks = disks
8504       else:
8505         raise errors.OpPrereqError("No disk info specified and the export"
8506                                    " is missing the disk information",
8507                                    errors.ECODE_INVAL)
8508
8509     if (not self.op.nics and
8510         einfo.has_option(constants.INISECT_INS, "nic_count")):
8511       nics = []
8512       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8513         ndict = {}
8514         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8515           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8516           ndict[name] = v
8517         nics.append(ndict)
8518       self.op.nics = nics
8519
8520     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8521       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8522
8523     if (self.op.hypervisor is None and
8524         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8525       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8526
8527     if einfo.has_section(constants.INISECT_HYP):
8528       # use the export parameters but do not override the ones
8529       # specified by the user
8530       for name, value in einfo.items(constants.INISECT_HYP):
8531         if name not in self.op.hvparams:
8532           self.op.hvparams[name] = value
8533
8534     if einfo.has_section(constants.INISECT_BEP):
8535       # use the parameters, without overriding
8536       for name, value in einfo.items(constants.INISECT_BEP):
8537         if name not in self.op.beparams:
8538           self.op.beparams[name] = value
8539     else:
8540       # try to read the parameters old style, from the main section
8541       for name in constants.BES_PARAMETERS:
8542         if (name not in self.op.beparams and
8543             einfo.has_option(constants.INISECT_INS, name)):
8544           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8545
8546     if einfo.has_section(constants.INISECT_OSP):
8547       # use the parameters, without overriding
8548       for name, value in einfo.items(constants.INISECT_OSP):
8549         if name not in self.op.osparams:
8550           self.op.osparams[name] = value
8551
8552   def _RevertToDefaults(self, cluster):
8553     """Revert the instance parameters to the default values.
8554
8555     """
8556     # hvparams
8557     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8558     for name in self.op.hvparams.keys():
8559       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8560         del self.op.hvparams[name]
8561     # beparams
8562     be_defs = cluster.SimpleFillBE({})
8563     for name in self.op.beparams.keys():
8564       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8565         del self.op.beparams[name]
8566     # nic params
8567     nic_defs = cluster.SimpleFillNIC({})
8568     for nic in self.op.nics:
8569       for name in constants.NICS_PARAMETERS:
8570         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8571           del nic[name]
8572     # osparams
8573     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8574     for name in self.op.osparams.keys():
8575       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8576         del self.op.osparams[name]
8577
8578   def _CalculateFileStorageDir(self):
8579     """Calculate final instance file storage dir.
8580
8581     """
8582     # file storage dir calculation/check
8583     self.instance_file_storage_dir = None
8584     if self.op.disk_template in constants.DTS_FILEBASED:
8585       # build the full file storage dir path
8586       joinargs = []
8587
8588       if self.op.disk_template == constants.DT_SHARED_FILE:
8589         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8590       else:
8591         get_fsd_fn = self.cfg.GetFileStorageDir
8592
8593       cfg_storagedir = get_fsd_fn()
8594       if not cfg_storagedir:
8595         raise errors.OpPrereqError("Cluster file storage dir not defined")
8596       joinargs.append(cfg_storagedir)
8597
8598       if self.op.file_storage_dir is not None:
8599         joinargs.append(self.op.file_storage_dir)
8600
8601       joinargs.append(self.op.instance_name)
8602
8603       # pylint: disable=W0142
8604       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8605
8606   def CheckPrereq(self):
8607     """Check prerequisites.
8608
8609     """
8610     self._CalculateFileStorageDir()
8611
8612     if self.op.mode == constants.INSTANCE_IMPORT:
8613       export_info = self._ReadExportInfo()
8614       self._ReadExportParams(export_info)
8615
8616     if (not self.cfg.GetVGName() and
8617         self.op.disk_template not in constants.DTS_NOT_LVM):
8618       raise errors.OpPrereqError("Cluster does not support lvm-based"
8619                                  " instances", errors.ECODE_STATE)
8620
8621     if self.op.hypervisor is None:
8622       self.op.hypervisor = self.cfg.GetHypervisorType()
8623
8624     cluster = self.cfg.GetClusterInfo()
8625     enabled_hvs = cluster.enabled_hypervisors
8626     if self.op.hypervisor not in enabled_hvs:
8627       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8628                                  " cluster (%s)" % (self.op.hypervisor,
8629                                   ",".join(enabled_hvs)),
8630                                  errors.ECODE_STATE)
8631
8632     # Check tag validity
8633     for tag in self.op.tags:
8634       objects.TaggableObject.ValidateTag(tag)
8635
8636     # check hypervisor parameter syntax (locally)
8637     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8638     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8639                                       self.op.hvparams)
8640     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8641     hv_type.CheckParameterSyntax(filled_hvp)
8642     self.hv_full = filled_hvp
8643     # check that we don't specify global parameters on an instance
8644     _CheckGlobalHvParams(self.op.hvparams)
8645
8646     # fill and remember the beparams dict
8647     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8648     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8649
8650     # build os parameters
8651     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8652
8653     # now that hvp/bep are in final format, let's reset to defaults,
8654     # if told to do so
8655     if self.op.identify_defaults:
8656       self._RevertToDefaults(cluster)
8657
8658     # NIC buildup
8659     self.nics = []
8660     for idx, nic in enumerate(self.op.nics):
8661       nic_mode_req = nic.get(constants.INIC_MODE, None)
8662       nic_mode = nic_mode_req
8663       if nic_mode is None:
8664         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8665
8666       # in routed mode, for the first nic, the default ip is 'auto'
8667       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8668         default_ip_mode = constants.VALUE_AUTO
8669       else:
8670         default_ip_mode = constants.VALUE_NONE
8671
8672       # ip validity checks
8673       ip = nic.get(constants.INIC_IP, default_ip_mode)
8674       if ip is None or ip.lower() == constants.VALUE_NONE:
8675         nic_ip = None
8676       elif ip.lower() == constants.VALUE_AUTO:
8677         if not self.op.name_check:
8678           raise errors.OpPrereqError("IP address set to auto but name checks"
8679                                      " have been skipped",
8680                                      errors.ECODE_INVAL)
8681         nic_ip = self.hostname1.ip
8682       else:
8683         if not netutils.IPAddress.IsValid(ip):
8684           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8685                                      errors.ECODE_INVAL)
8686         nic_ip = ip
8687
8688       # TODO: check the ip address for uniqueness
8689       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8690         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8691                                    errors.ECODE_INVAL)
8692
8693       # MAC address verification
8694       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8695       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8696         mac = utils.NormalizeAndValidateMac(mac)
8697
8698         try:
8699           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8700         except errors.ReservationError:
8701           raise errors.OpPrereqError("MAC address %s already in use"
8702                                      " in cluster" % mac,
8703                                      errors.ECODE_NOTUNIQUE)
8704
8705       #  Build nic parameters
8706       link = nic.get(constants.INIC_LINK, None)
8707       nicparams = {}
8708       if nic_mode_req:
8709         nicparams[constants.NIC_MODE] = nic_mode_req
8710       if link:
8711         nicparams[constants.NIC_LINK] = link
8712
8713       check_params = cluster.SimpleFillNIC(nicparams)
8714       objects.NIC.CheckParameterSyntax(check_params)
8715       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8716
8717     # disk checks/pre-build
8718     default_vg = self.cfg.GetVGName()
8719     self.disks = []
8720     for disk in self.op.disks:
8721       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8722       if mode not in constants.DISK_ACCESS_SET:
8723         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8724                                    mode, errors.ECODE_INVAL)
8725       size = disk.get(constants.IDISK_SIZE, None)
8726       if size is None:
8727         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8728       try:
8729         size = int(size)
8730       except (TypeError, ValueError):
8731         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8732                                    errors.ECODE_INVAL)
8733
8734       data_vg = disk.get(constants.IDISK_VG, default_vg)
8735       new_disk = {
8736         constants.IDISK_SIZE: size,
8737         constants.IDISK_MODE: mode,
8738         constants.IDISK_VG: data_vg,
8739         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8740         }
8741       if constants.IDISK_ADOPT in disk:
8742         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8743       self.disks.append(new_disk)
8744
8745     if self.op.mode == constants.INSTANCE_IMPORT:
8746
8747       # Check that the new instance doesn't have less disks than the export
8748       instance_disks = len(self.disks)
8749       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8750       if instance_disks < export_disks:
8751         raise errors.OpPrereqError("Not enough disks to import."
8752                                    " (instance: %d, export: %d)" %
8753                                    (instance_disks, export_disks),
8754                                    errors.ECODE_INVAL)
8755
8756       disk_images = []
8757       for idx in range(export_disks):
8758         option = "disk%d_dump" % idx
8759         if export_info.has_option(constants.INISECT_INS, option):
8760           # FIXME: are the old os-es, disk sizes, etc. useful?
8761           export_name = export_info.get(constants.INISECT_INS, option)
8762           image = utils.PathJoin(self.op.src_path, export_name)
8763           disk_images.append(image)
8764         else:
8765           disk_images.append(False)
8766
8767       self.src_images = disk_images
8768
8769       old_name = export_info.get(constants.INISECT_INS, "name")
8770       try:
8771         exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8772       except (TypeError, ValueError), err:
8773         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8774                                    " an integer: %s" % str(err),
8775                                    errors.ECODE_STATE)
8776       if self.op.instance_name == old_name:
8777         for idx, nic in enumerate(self.nics):
8778           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8779             nic_mac_ini = "nic%d_mac" % idx
8780             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8781
8782     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8783
8784     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8785     if self.op.ip_check:
8786       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8787         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8788                                    (self.check_ip, self.op.instance_name),
8789                                    errors.ECODE_NOTUNIQUE)
8790
8791     #### mac address generation
8792     # By generating here the mac address both the allocator and the hooks get
8793     # the real final mac address rather than the 'auto' or 'generate' value.
8794     # There is a race condition between the generation and the instance object
8795     # creation, which means that we know the mac is valid now, but we're not
8796     # sure it will be when we actually add the instance. If things go bad
8797     # adding the instance will abort because of a duplicate mac, and the
8798     # creation job will fail.
8799     for nic in self.nics:
8800       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8801         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8802
8803     #### allocator run
8804
8805     if self.op.iallocator is not None:
8806       self._RunAllocator()
8807
8808     #### node related checks
8809
8810     # check primary node
8811     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8812     assert self.pnode is not None, \
8813       "Cannot retrieve locked node %s" % self.op.pnode
8814     if pnode.offline:
8815       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8816                                  pnode.name, errors.ECODE_STATE)
8817     if pnode.drained:
8818       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8819                                  pnode.name, errors.ECODE_STATE)
8820     if not pnode.vm_capable:
8821       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8822                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8823
8824     self.secondaries = []
8825
8826     # mirror node verification
8827     if self.op.disk_template in constants.DTS_INT_MIRROR:
8828       if self.op.snode == pnode.name:
8829         raise errors.OpPrereqError("The secondary node cannot be the"
8830                                    " primary node", errors.ECODE_INVAL)
8831       _CheckNodeOnline(self, self.op.snode)
8832       _CheckNodeNotDrained(self, self.op.snode)
8833       _CheckNodeVmCapable(self, self.op.snode)
8834       self.secondaries.append(self.op.snode)
8835
8836     nodenames = [pnode.name] + self.secondaries
8837
8838     if not self.adopt_disks:
8839       # Check lv size requirements, if not adopting
8840       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8841       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8842
8843     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8844       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8845                                 disk[constants.IDISK_ADOPT])
8846                      for disk in self.disks])
8847       if len(all_lvs) != len(self.disks):
8848         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8849                                    errors.ECODE_INVAL)
8850       for lv_name in all_lvs:
8851         try:
8852           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8853           # to ReserveLV uses the same syntax
8854           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8855         except errors.ReservationError:
8856           raise errors.OpPrereqError("LV named %s used by another instance" %
8857                                      lv_name, errors.ECODE_NOTUNIQUE)
8858
8859       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8860       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8861
8862       node_lvs = self.rpc.call_lv_list([pnode.name],
8863                                        vg_names.payload.keys())[pnode.name]
8864       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8865       node_lvs = node_lvs.payload
8866
8867       delta = all_lvs.difference(node_lvs.keys())
8868       if delta:
8869         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8870                                    utils.CommaJoin(delta),
8871                                    errors.ECODE_INVAL)
8872       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8873       if online_lvs:
8874         raise errors.OpPrereqError("Online logical volumes found, cannot"
8875                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8876                                    errors.ECODE_STATE)
8877       # update the size of disk based on what is found
8878       for dsk in self.disks:
8879         dsk[constants.IDISK_SIZE] = \
8880           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8881                                         dsk[constants.IDISK_ADOPT])][0]))
8882
8883     elif self.op.disk_template == constants.DT_BLOCK:
8884       # Normalize and de-duplicate device paths
8885       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8886                        for disk in self.disks])
8887       if len(all_disks) != len(self.disks):
8888         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8889                                    errors.ECODE_INVAL)
8890       baddisks = [d for d in all_disks
8891                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8892       if baddisks:
8893         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8894                                    " cannot be adopted" %
8895                                    (", ".join(baddisks),
8896                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8897                                    errors.ECODE_INVAL)
8898
8899       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8900                                             list(all_disks))[pnode.name]
8901       node_disks.Raise("Cannot get block device information from node %s" %
8902                        pnode.name)
8903       node_disks = node_disks.payload
8904       delta = all_disks.difference(node_disks.keys())
8905       if delta:
8906         raise errors.OpPrereqError("Missing block device(s): %s" %
8907                                    utils.CommaJoin(delta),
8908                                    errors.ECODE_INVAL)
8909       for dsk in self.disks:
8910         dsk[constants.IDISK_SIZE] = \
8911           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8912
8913     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8914
8915     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8916     # check OS parameters (remotely)
8917     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8918
8919     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8920
8921     # memory check on primary node
8922     if self.op.start:
8923       _CheckNodeFreeMemory(self, self.pnode.name,
8924                            "creating instance %s" % self.op.instance_name,
8925                            self.be_full[constants.BE_MEMORY],
8926                            self.op.hypervisor)
8927
8928     self.dry_run_result = list(nodenames)
8929
8930   def Exec(self, feedback_fn):
8931     """Create and add the instance to the cluster.
8932
8933     """
8934     instance = self.op.instance_name
8935     pnode_name = self.pnode.name
8936
8937     ht_kind = self.op.hypervisor
8938     if ht_kind in constants.HTS_REQ_PORT:
8939       network_port = self.cfg.AllocatePort()
8940     else:
8941       network_port = None
8942
8943     disks = _GenerateDiskTemplate(self,
8944                                   self.op.disk_template,
8945                                   instance, pnode_name,
8946                                   self.secondaries,
8947                                   self.disks,
8948                                   self.instance_file_storage_dir,
8949                                   self.op.file_driver,
8950                                   0,
8951                                   feedback_fn)
8952
8953     iobj = objects.Instance(name=instance, os=self.op.os_type,
8954                             primary_node=pnode_name,
8955                             nics=self.nics, disks=disks,
8956                             disk_template=self.op.disk_template,
8957                             admin_up=False,
8958                             network_port=network_port,
8959                             beparams=self.op.beparams,
8960                             hvparams=self.op.hvparams,
8961                             hypervisor=self.op.hypervisor,
8962                             osparams=self.op.osparams,
8963                             )
8964
8965     if self.op.tags:
8966       for tag in self.op.tags:
8967         iobj.AddTag(tag)
8968
8969     if self.adopt_disks:
8970       if self.op.disk_template == constants.DT_PLAIN:
8971         # rename LVs to the newly-generated names; we need to construct
8972         # 'fake' LV disks with the old data, plus the new unique_id
8973         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8974         rename_to = []
8975         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8976           rename_to.append(t_dsk.logical_id)
8977           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8978           self.cfg.SetDiskID(t_dsk, pnode_name)
8979         result = self.rpc.call_blockdev_rename(pnode_name,
8980                                                zip(tmp_disks, rename_to))
8981         result.Raise("Failed to rename adoped LVs")
8982     else:
8983       feedback_fn("* creating instance disks...")
8984       try:
8985         _CreateDisks(self, iobj)
8986       except errors.OpExecError:
8987         self.LogWarning("Device creation failed, reverting...")
8988         try:
8989           _RemoveDisks(self, iobj)
8990         finally:
8991           self.cfg.ReleaseDRBDMinors(instance)
8992           raise
8993
8994     feedback_fn("adding instance %s to cluster config" % instance)
8995
8996     self.cfg.AddInstance(iobj, self.proc.GetECId())
8997
8998     # Declare that we don't want to remove the instance lock anymore, as we've
8999     # added the instance to the config
9000     del self.remove_locks[locking.LEVEL_INSTANCE]
9001
9002     if self.op.mode == constants.INSTANCE_IMPORT:
9003       # Release unused nodes
9004       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9005     else:
9006       # Release all nodes
9007       _ReleaseLocks(self, locking.LEVEL_NODE)
9008
9009     disk_abort = False
9010     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9011       feedback_fn("* wiping instance disks...")
9012       try:
9013         _WipeDisks(self, iobj)
9014       except errors.OpExecError, err:
9015         logging.exception("Wiping disks failed")
9016         self.LogWarning("Wiping instance disks failed (%s)", err)
9017         disk_abort = True
9018
9019     if disk_abort:
9020       # Something is already wrong with the disks, don't do anything else
9021       pass
9022     elif self.op.wait_for_sync:
9023       disk_abort = not _WaitForSync(self, iobj)
9024     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9025       # make sure the disks are not degraded (still sync-ing is ok)
9026       feedback_fn("* checking mirrors status")
9027       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9028     else:
9029       disk_abort = False
9030
9031     if disk_abort:
9032       _RemoveDisks(self, iobj)
9033       self.cfg.RemoveInstance(iobj.name)
9034       # Make sure the instance lock gets removed
9035       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9036       raise errors.OpExecError("There are some degraded disks for"
9037                                " this instance")
9038
9039     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9040       if self.op.mode == constants.INSTANCE_CREATE:
9041         if not self.op.no_install:
9042           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9043                         not self.op.wait_for_sync)
9044           if pause_sync:
9045             feedback_fn("* pausing disk sync to install instance OS")
9046             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9047                                                               iobj.disks, True)
9048             for idx, success in enumerate(result.payload):
9049               if not success:
9050                 logging.warn("pause-sync of instance %s for disk %d failed",
9051                              instance, idx)
9052
9053           feedback_fn("* running the instance OS create scripts...")
9054           # FIXME: pass debug option from opcode to backend
9055           os_add_result = \
9056             self.rpc.call_instance_os_add(pnode_name, iobj, False,
9057                                           self.op.debug_level)
9058           if pause_sync:
9059             feedback_fn("* resuming disk sync")
9060             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9061                                                               iobj.disks, False)
9062             for idx, success in enumerate(result.payload):
9063               if not success:
9064                 logging.warn("resume-sync of instance %s for disk %d failed",
9065                              instance, idx)
9066
9067           os_add_result.Raise("Could not add os for instance %s"
9068                               " on node %s" % (instance, pnode_name))
9069
9070       elif self.op.mode == constants.INSTANCE_IMPORT:
9071         feedback_fn("* running the instance OS import scripts...")
9072
9073         transfers = []
9074
9075         for idx, image in enumerate(self.src_images):
9076           if not image:
9077             continue
9078
9079           # FIXME: pass debug option from opcode to backend
9080           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9081                                              constants.IEIO_FILE, (image, ),
9082                                              constants.IEIO_SCRIPT,
9083                                              (iobj.disks[idx], idx),
9084                                              None)
9085           transfers.append(dt)
9086
9087         import_result = \
9088           masterd.instance.TransferInstanceData(self, feedback_fn,
9089                                                 self.op.src_node, pnode_name,
9090                                                 self.pnode.secondary_ip,
9091                                                 iobj, transfers)
9092         if not compat.all(import_result):
9093           self.LogWarning("Some disks for instance %s on node %s were not"
9094                           " imported successfully" % (instance, pnode_name))
9095
9096       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9097         feedback_fn("* preparing remote import...")
9098         # The source cluster will stop the instance before attempting to make a
9099         # connection. In some cases stopping an instance can take a long time,
9100         # hence the shutdown timeout is added to the connection timeout.
9101         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9102                            self.op.source_shutdown_timeout)
9103         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9104
9105         assert iobj.primary_node == self.pnode.name
9106         disk_results = \
9107           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9108                                         self.source_x509_ca,
9109                                         self._cds, timeouts)
9110         if not compat.all(disk_results):
9111           # TODO: Should the instance still be started, even if some disks
9112           # failed to import (valid for local imports, too)?
9113           self.LogWarning("Some disks for instance %s on node %s were not"
9114                           " imported successfully" % (instance, pnode_name))
9115
9116         # Run rename script on newly imported instance
9117         assert iobj.name == instance
9118         feedback_fn("Running rename script for %s" % instance)
9119         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9120                                                    self.source_instance_name,
9121                                                    self.op.debug_level)
9122         if result.fail_msg:
9123           self.LogWarning("Failed to run rename script for %s on node"
9124                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9125
9126       else:
9127         # also checked in the prereq part
9128         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9129                                      % self.op.mode)
9130
9131     if self.op.start:
9132       iobj.admin_up = True
9133       self.cfg.Update(iobj, feedback_fn)
9134       logging.info("Starting instance %s on node %s", instance, pnode_name)
9135       feedback_fn("* starting instance...")
9136       result = self.rpc.call_instance_start(pnode_name, iobj,
9137                                             None, None, False)
9138       result.Raise("Could not start instance")
9139
9140     return list(iobj.all_nodes)
9141
9142
9143 class LUInstanceConsole(NoHooksLU):
9144   """Connect to an instance's console.
9145
9146   This is somewhat special in that it returns the command line that
9147   you need to run on the master node in order to connect to the
9148   console.
9149
9150   """
9151   REQ_BGL = False
9152
9153   def ExpandNames(self):
9154     self._ExpandAndLockInstance()
9155
9156   def CheckPrereq(self):
9157     """Check prerequisites.
9158
9159     This checks that the instance is in the cluster.
9160
9161     """
9162     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9163     assert self.instance is not None, \
9164       "Cannot retrieve locked instance %s" % self.op.instance_name
9165     _CheckNodeOnline(self, self.instance.primary_node)
9166
9167   def Exec(self, feedback_fn):
9168     """Connect to the console of an instance
9169
9170     """
9171     instance = self.instance
9172     node = instance.primary_node
9173
9174     node_insts = self.rpc.call_instance_list([node],
9175                                              [instance.hypervisor])[node]
9176     node_insts.Raise("Can't get node information from %s" % node)
9177
9178     if instance.name not in node_insts.payload:
9179       if instance.admin_up:
9180         state = constants.INSTST_ERRORDOWN
9181       else:
9182         state = constants.INSTST_ADMINDOWN
9183       raise errors.OpExecError("Instance %s is not running (state %s)" %
9184                                (instance.name, state))
9185
9186     logging.debug("Connecting to console of %s on %s", instance.name, node)
9187
9188     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9189
9190
9191 def _GetInstanceConsole(cluster, instance):
9192   """Returns console information for an instance.
9193
9194   @type cluster: L{objects.Cluster}
9195   @type instance: L{objects.Instance}
9196   @rtype: dict
9197
9198   """
9199   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9200   # beparams and hvparams are passed separately, to avoid editing the
9201   # instance and then saving the defaults in the instance itself.
9202   hvparams = cluster.FillHV(instance)
9203   beparams = cluster.FillBE(instance)
9204   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9205
9206   assert console.instance == instance.name
9207   assert console.Validate()
9208
9209   return console.ToDict()
9210
9211
9212 class LUInstanceReplaceDisks(LogicalUnit):
9213   """Replace the disks of an instance.
9214
9215   """
9216   HPATH = "mirrors-replace"
9217   HTYPE = constants.HTYPE_INSTANCE
9218   REQ_BGL = False
9219
9220   def CheckArguments(self):
9221     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9222                                   self.op.iallocator)
9223
9224   def ExpandNames(self):
9225     self._ExpandAndLockInstance()
9226
9227     assert locking.LEVEL_NODE not in self.needed_locks
9228     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9229
9230     assert self.op.iallocator is None or self.op.remote_node is None, \
9231       "Conflicting options"
9232
9233     if self.op.remote_node is not None:
9234       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9235
9236       # Warning: do not remove the locking of the new secondary here
9237       # unless DRBD8.AddChildren is changed to work in parallel;
9238       # currently it doesn't since parallel invocations of
9239       # FindUnusedMinor will conflict
9240       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9241       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9242     else:
9243       self.needed_locks[locking.LEVEL_NODE] = []
9244       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9245
9246       if self.op.iallocator is not None:
9247         # iallocator will select a new node in the same group
9248         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9249
9250     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9251                                    self.op.iallocator, self.op.remote_node,
9252                                    self.op.disks, False, self.op.early_release)
9253
9254     self.tasklets = [self.replacer]
9255
9256   def DeclareLocks(self, level):
9257     if level == locking.LEVEL_NODEGROUP:
9258       assert self.op.remote_node is None
9259       assert self.op.iallocator is not None
9260       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9261
9262       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9263       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9264         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9265
9266     elif level == locking.LEVEL_NODE:
9267       if self.op.iallocator is not None:
9268         assert self.op.remote_node is None
9269         assert not self.needed_locks[locking.LEVEL_NODE]
9270
9271         # Lock member nodes of all locked groups
9272         self.needed_locks[locking.LEVEL_NODE] = [node_name
9273           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9274           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9275       else:
9276         self._LockInstancesNodes()
9277
9278   def BuildHooksEnv(self):
9279     """Build hooks env.
9280
9281     This runs on the master, the primary and all the secondaries.
9282
9283     """
9284     instance = self.replacer.instance
9285     env = {
9286       "MODE": self.op.mode,
9287       "NEW_SECONDARY": self.op.remote_node,
9288       "OLD_SECONDARY": instance.secondary_nodes[0],
9289       }
9290     env.update(_BuildInstanceHookEnvByObject(self, instance))
9291     return env
9292
9293   def BuildHooksNodes(self):
9294     """Build hooks nodes.
9295
9296     """
9297     instance = self.replacer.instance
9298     nl = [
9299       self.cfg.GetMasterNode(),
9300       instance.primary_node,
9301       ]
9302     if self.op.remote_node is not None:
9303       nl.append(self.op.remote_node)
9304     return nl, nl
9305
9306   def CheckPrereq(self):
9307     """Check prerequisites.
9308
9309     """
9310     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9311             self.op.iallocator is None)
9312
9313     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9314     if owned_groups:
9315       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9316
9317     return LogicalUnit.CheckPrereq(self)
9318
9319
9320 class TLReplaceDisks(Tasklet):
9321   """Replaces disks for an instance.
9322
9323   Note: Locking is not within the scope of this class.
9324
9325   """
9326   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9327                disks, delay_iallocator, early_release):
9328     """Initializes this class.
9329
9330     """
9331     Tasklet.__init__(self, lu)
9332
9333     # Parameters
9334     self.instance_name = instance_name
9335     self.mode = mode
9336     self.iallocator_name = iallocator_name
9337     self.remote_node = remote_node
9338     self.disks = disks
9339     self.delay_iallocator = delay_iallocator
9340     self.early_release = early_release
9341
9342     # Runtime data
9343     self.instance = None
9344     self.new_node = None
9345     self.target_node = None
9346     self.other_node = None
9347     self.remote_node_info = None
9348     self.node_secondary_ip = None
9349
9350   @staticmethod
9351   def CheckArguments(mode, remote_node, iallocator):
9352     """Helper function for users of this class.
9353
9354     """
9355     # check for valid parameter combination
9356     if mode == constants.REPLACE_DISK_CHG:
9357       if remote_node is None and iallocator is None:
9358         raise errors.OpPrereqError("When changing the secondary either an"
9359                                    " iallocator script must be used or the"
9360                                    " new node given", errors.ECODE_INVAL)
9361
9362       if remote_node is not None and iallocator is not None:
9363         raise errors.OpPrereqError("Give either the iallocator or the new"
9364                                    " secondary, not both", errors.ECODE_INVAL)
9365
9366     elif remote_node is not None or iallocator is not None:
9367       # Not replacing the secondary
9368       raise errors.OpPrereqError("The iallocator and new node options can"
9369                                  " only be used when changing the"
9370                                  " secondary node", errors.ECODE_INVAL)
9371
9372   @staticmethod
9373   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9374     """Compute a new secondary node using an IAllocator.
9375
9376     """
9377     ial = IAllocator(lu.cfg, lu.rpc,
9378                      mode=constants.IALLOCATOR_MODE_RELOC,
9379                      name=instance_name,
9380                      relocate_from=list(relocate_from))
9381
9382     ial.Run(iallocator_name)
9383
9384     if not ial.success:
9385       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9386                                  " %s" % (iallocator_name, ial.info),
9387                                  errors.ECODE_NORES)
9388
9389     if len(ial.result) != ial.required_nodes:
9390       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9391                                  " of nodes (%s), required %s" %
9392                                  (iallocator_name,
9393                                   len(ial.result), ial.required_nodes),
9394                                  errors.ECODE_FAULT)
9395
9396     remote_node_name = ial.result[0]
9397
9398     lu.LogInfo("Selected new secondary for instance '%s': %s",
9399                instance_name, remote_node_name)
9400
9401     return remote_node_name
9402
9403   def _FindFaultyDisks(self, node_name):
9404     """Wrapper for L{_FindFaultyInstanceDisks}.
9405
9406     """
9407     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9408                                     node_name, True)
9409
9410   def _CheckDisksActivated(self, instance):
9411     """Checks if the instance disks are activated.
9412
9413     @param instance: The instance to check disks
9414     @return: True if they are activated, False otherwise
9415
9416     """
9417     nodes = instance.all_nodes
9418
9419     for idx, dev in enumerate(instance.disks):
9420       for node in nodes:
9421         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9422         self.cfg.SetDiskID(dev, node)
9423
9424         result = self.rpc.call_blockdev_find(node, dev)
9425
9426         if result.offline:
9427           continue
9428         elif result.fail_msg or not result.payload:
9429           return False
9430
9431     return True
9432
9433   def CheckPrereq(self):
9434     """Check prerequisites.
9435
9436     This checks that the instance is in the cluster.
9437
9438     """
9439     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9440     assert instance is not None, \
9441       "Cannot retrieve locked instance %s" % self.instance_name
9442
9443     if instance.disk_template != constants.DT_DRBD8:
9444       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9445                                  " instances", errors.ECODE_INVAL)
9446
9447     if len(instance.secondary_nodes) != 1:
9448       raise errors.OpPrereqError("The instance has a strange layout,"
9449                                  " expected one secondary but found %d" %
9450                                  len(instance.secondary_nodes),
9451                                  errors.ECODE_FAULT)
9452
9453     if not self.delay_iallocator:
9454       self._CheckPrereq2()
9455
9456   def _CheckPrereq2(self):
9457     """Check prerequisites, second part.
9458
9459     This function should always be part of CheckPrereq. It was separated and is
9460     now called from Exec because during node evacuation iallocator was only
9461     called with an unmodified cluster model, not taking planned changes into
9462     account.
9463
9464     """
9465     instance = self.instance
9466     secondary_node = instance.secondary_nodes[0]
9467
9468     if self.iallocator_name is None:
9469       remote_node = self.remote_node
9470     else:
9471       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9472                                        instance.name, instance.secondary_nodes)
9473
9474     if remote_node is None:
9475       self.remote_node_info = None
9476     else:
9477       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9478              "Remote node '%s' is not locked" % remote_node
9479
9480       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9481       assert self.remote_node_info is not None, \
9482         "Cannot retrieve locked node %s" % remote_node
9483
9484     if remote_node == self.instance.primary_node:
9485       raise errors.OpPrereqError("The specified node is the primary node of"
9486                                  " the instance", errors.ECODE_INVAL)
9487
9488     if remote_node == secondary_node:
9489       raise errors.OpPrereqError("The specified node is already the"
9490                                  " secondary node of the instance",
9491                                  errors.ECODE_INVAL)
9492
9493     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9494                                     constants.REPLACE_DISK_CHG):
9495       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9496                                  errors.ECODE_INVAL)
9497
9498     if self.mode == constants.REPLACE_DISK_AUTO:
9499       if not self._CheckDisksActivated(instance):
9500         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9501                                    " first" % self.instance_name,
9502                                    errors.ECODE_STATE)
9503       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9504       faulty_secondary = self._FindFaultyDisks(secondary_node)
9505
9506       if faulty_primary and faulty_secondary:
9507         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9508                                    " one node and can not be repaired"
9509                                    " automatically" % self.instance_name,
9510                                    errors.ECODE_STATE)
9511
9512       if faulty_primary:
9513         self.disks = faulty_primary
9514         self.target_node = instance.primary_node
9515         self.other_node = secondary_node
9516         check_nodes = [self.target_node, self.other_node]
9517       elif faulty_secondary:
9518         self.disks = faulty_secondary
9519         self.target_node = secondary_node
9520         self.other_node = instance.primary_node
9521         check_nodes = [self.target_node, self.other_node]
9522       else:
9523         self.disks = []
9524         check_nodes = []
9525
9526     else:
9527       # Non-automatic modes
9528       if self.mode == constants.REPLACE_DISK_PRI:
9529         self.target_node = instance.primary_node
9530         self.other_node = secondary_node
9531         check_nodes = [self.target_node, self.other_node]
9532
9533       elif self.mode == constants.REPLACE_DISK_SEC:
9534         self.target_node = secondary_node
9535         self.other_node = instance.primary_node
9536         check_nodes = [self.target_node, self.other_node]
9537
9538       elif self.mode == constants.REPLACE_DISK_CHG:
9539         self.new_node = remote_node
9540         self.other_node = instance.primary_node
9541         self.target_node = secondary_node
9542         check_nodes = [self.new_node, self.other_node]
9543
9544         _CheckNodeNotDrained(self.lu, remote_node)
9545         _CheckNodeVmCapable(self.lu, remote_node)
9546
9547         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9548         assert old_node_info is not None
9549         if old_node_info.offline and not self.early_release:
9550           # doesn't make sense to delay the release
9551           self.early_release = True
9552           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9553                           " early-release mode", secondary_node)
9554
9555       else:
9556         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9557                                      self.mode)
9558
9559       # If not specified all disks should be replaced
9560       if not self.disks:
9561         self.disks = range(len(self.instance.disks))
9562
9563     for node in check_nodes:
9564       _CheckNodeOnline(self.lu, node)
9565
9566     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9567                                                           self.other_node,
9568                                                           self.target_node]
9569                               if node_name is not None)
9570
9571     # Release unneeded node locks
9572     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9573
9574     # Release any owned node group
9575     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9576       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9577
9578     # Check whether disks are valid
9579     for disk_idx in self.disks:
9580       instance.FindDisk(disk_idx)
9581
9582     # Get secondary node IP addresses
9583     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9584                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9585
9586   def Exec(self, feedback_fn):
9587     """Execute disk replacement.
9588
9589     This dispatches the disk replacement to the appropriate handler.
9590
9591     """
9592     if self.delay_iallocator:
9593       self._CheckPrereq2()
9594
9595     if __debug__:
9596       # Verify owned locks before starting operation
9597       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9598       assert set(owned_nodes) == set(self.node_secondary_ip), \
9599           ("Incorrect node locks, owning %s, expected %s" %
9600            (owned_nodes, self.node_secondary_ip.keys()))
9601
9602       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9603       assert list(owned_instances) == [self.instance_name], \
9604           "Instance '%s' not locked" % self.instance_name
9605
9606       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9607           "Should not own any node group lock at this point"
9608
9609     if not self.disks:
9610       feedback_fn("No disks need replacement")
9611       return
9612
9613     feedback_fn("Replacing disk(s) %s for %s" %
9614                 (utils.CommaJoin(self.disks), self.instance.name))
9615
9616     activate_disks = (not self.instance.admin_up)
9617
9618     # Activate the instance disks if we're replacing them on a down instance
9619     if activate_disks:
9620       _StartInstanceDisks(self.lu, self.instance, True)
9621
9622     try:
9623       # Should we replace the secondary node?
9624       if self.new_node is not None:
9625         fn = self._ExecDrbd8Secondary
9626       else:
9627         fn = self._ExecDrbd8DiskOnly
9628
9629       result = fn(feedback_fn)
9630     finally:
9631       # Deactivate the instance disks if we're replacing them on a
9632       # down instance
9633       if activate_disks:
9634         _SafeShutdownInstanceDisks(self.lu, self.instance)
9635
9636     if __debug__:
9637       # Verify owned locks
9638       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9639       nodes = frozenset(self.node_secondary_ip)
9640       assert ((self.early_release and not owned_nodes) or
9641               (not self.early_release and not (set(owned_nodes) - nodes))), \
9642         ("Not owning the correct locks, early_release=%s, owned=%r,"
9643          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9644
9645     return result
9646
9647   def _CheckVolumeGroup(self, nodes):
9648     self.lu.LogInfo("Checking volume groups")
9649
9650     vgname = self.cfg.GetVGName()
9651
9652     # Make sure volume group exists on all involved nodes
9653     results = self.rpc.call_vg_list(nodes)
9654     if not results:
9655       raise errors.OpExecError("Can't list volume groups on the nodes")
9656
9657     for node in nodes:
9658       res = results[node]
9659       res.Raise("Error checking node %s" % node)
9660       if vgname not in res.payload:
9661         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9662                                  (vgname, node))
9663
9664   def _CheckDisksExistence(self, nodes):
9665     # Check disk existence
9666     for idx, dev in enumerate(self.instance.disks):
9667       if idx not in self.disks:
9668         continue
9669
9670       for node in nodes:
9671         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9672         self.cfg.SetDiskID(dev, node)
9673
9674         result = self.rpc.call_blockdev_find(node, dev)
9675
9676         msg = result.fail_msg
9677         if msg or not result.payload:
9678           if not msg:
9679             msg = "disk not found"
9680           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9681                                    (idx, node, msg))
9682
9683   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9684     for idx, dev in enumerate(self.instance.disks):
9685       if idx not in self.disks:
9686         continue
9687
9688       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9689                       (idx, node_name))
9690
9691       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9692                                    ldisk=ldisk):
9693         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9694                                  " replace disks for instance %s" %
9695                                  (node_name, self.instance.name))
9696
9697   def _CreateNewStorage(self, node_name):
9698     """Create new storage on the primary or secondary node.
9699
9700     This is only used for same-node replaces, not for changing the
9701     secondary node, hence we don't want to modify the existing disk.
9702
9703     """
9704     iv_names = {}
9705
9706     for idx, dev in enumerate(self.instance.disks):
9707       if idx not in self.disks:
9708         continue
9709
9710       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9711
9712       self.cfg.SetDiskID(dev, node_name)
9713
9714       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9715       names = _GenerateUniqueNames(self.lu, lv_names)
9716
9717       vg_data = dev.children[0].logical_id[0]
9718       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9719                              logical_id=(vg_data, names[0]))
9720       vg_meta = dev.children[1].logical_id[0]
9721       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9722                              logical_id=(vg_meta, names[1]))
9723
9724       new_lvs = [lv_data, lv_meta]
9725       old_lvs = [child.Copy() for child in dev.children]
9726       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9727
9728       # we pass force_create=True to force the LVM creation
9729       for new_lv in new_lvs:
9730         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9731                         _GetInstanceInfoText(self.instance), False)
9732
9733     return iv_names
9734
9735   def _CheckDevices(self, node_name, iv_names):
9736     for name, (dev, _, _) in iv_names.iteritems():
9737       self.cfg.SetDiskID(dev, node_name)
9738
9739       result = self.rpc.call_blockdev_find(node_name, dev)
9740
9741       msg = result.fail_msg
9742       if msg or not result.payload:
9743         if not msg:
9744           msg = "disk not found"
9745         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9746                                  (name, msg))
9747
9748       if result.payload.is_degraded:
9749         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9750
9751   def _RemoveOldStorage(self, node_name, iv_names):
9752     for name, (_, old_lvs, _) in iv_names.iteritems():
9753       self.lu.LogInfo("Remove logical volumes for %s" % name)
9754
9755       for lv in old_lvs:
9756         self.cfg.SetDiskID(lv, node_name)
9757
9758         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9759         if msg:
9760           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9761                              hint="remove unused LVs manually")
9762
9763   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9764     """Replace a disk on the primary or secondary for DRBD 8.
9765
9766     The algorithm for replace is quite complicated:
9767
9768       1. for each disk to be replaced:
9769
9770         1. create new LVs on the target node with unique names
9771         1. detach old LVs from the drbd device
9772         1. rename old LVs to name_replaced.<time_t>
9773         1. rename new LVs to old LVs
9774         1. attach the new LVs (with the old names now) to the drbd device
9775
9776       1. wait for sync across all devices
9777
9778       1. for each modified disk:
9779
9780         1. remove old LVs (which have the name name_replaces.<time_t>)
9781
9782     Failures are not very well handled.
9783
9784     """
9785     steps_total = 6
9786
9787     # Step: check device activation
9788     self.lu.LogStep(1, steps_total, "Check device existence")
9789     self._CheckDisksExistence([self.other_node, self.target_node])
9790     self._CheckVolumeGroup([self.target_node, self.other_node])
9791
9792     # Step: check other node consistency
9793     self.lu.LogStep(2, steps_total, "Check peer consistency")
9794     self._CheckDisksConsistency(self.other_node,
9795                                 self.other_node == self.instance.primary_node,
9796                                 False)
9797
9798     # Step: create new storage
9799     self.lu.LogStep(3, steps_total, "Allocate new storage")
9800     iv_names = self._CreateNewStorage(self.target_node)
9801
9802     # Step: for each lv, detach+rename*2+attach
9803     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9804     for dev, old_lvs, new_lvs in iv_names.itervalues():
9805       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9806
9807       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9808                                                      old_lvs)
9809       result.Raise("Can't detach drbd from local storage on node"
9810                    " %s for device %s" % (self.target_node, dev.iv_name))
9811       #dev.children = []
9812       #cfg.Update(instance)
9813
9814       # ok, we created the new LVs, so now we know we have the needed
9815       # storage; as such, we proceed on the target node to rename
9816       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9817       # using the assumption that logical_id == physical_id (which in
9818       # turn is the unique_id on that node)
9819
9820       # FIXME(iustin): use a better name for the replaced LVs
9821       temp_suffix = int(time.time())
9822       ren_fn = lambda d, suff: (d.physical_id[0],
9823                                 d.physical_id[1] + "_replaced-%s" % suff)
9824
9825       # Build the rename list based on what LVs exist on the node
9826       rename_old_to_new = []
9827       for to_ren in old_lvs:
9828         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9829         if not result.fail_msg and result.payload:
9830           # device exists
9831           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9832
9833       self.lu.LogInfo("Renaming the old LVs on the target node")
9834       result = self.rpc.call_blockdev_rename(self.target_node,
9835                                              rename_old_to_new)
9836       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9837
9838       # Now we rename the new LVs to the old LVs
9839       self.lu.LogInfo("Renaming the new LVs on the target node")
9840       rename_new_to_old = [(new, old.physical_id)
9841                            for old, new in zip(old_lvs, new_lvs)]
9842       result = self.rpc.call_blockdev_rename(self.target_node,
9843                                              rename_new_to_old)
9844       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9845
9846       # Intermediate steps of in memory modifications
9847       for old, new in zip(old_lvs, new_lvs):
9848         new.logical_id = old.logical_id
9849         self.cfg.SetDiskID(new, self.target_node)
9850
9851       # We need to modify old_lvs so that removal later removes the
9852       # right LVs, not the newly added ones; note that old_lvs is a
9853       # copy here
9854       for disk in old_lvs:
9855         disk.logical_id = ren_fn(disk, temp_suffix)
9856         self.cfg.SetDiskID(disk, self.target_node)
9857
9858       # Now that the new lvs have the old name, we can add them to the device
9859       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9860       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9861                                                   new_lvs)
9862       msg = result.fail_msg
9863       if msg:
9864         for new_lv in new_lvs:
9865           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9866                                                new_lv).fail_msg
9867           if msg2:
9868             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9869                                hint=("cleanup manually the unused logical"
9870                                      "volumes"))
9871         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9872
9873     cstep = 5
9874     if self.early_release:
9875       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9876       cstep += 1
9877       self._RemoveOldStorage(self.target_node, iv_names)
9878       # WARNING: we release both node locks here, do not do other RPCs
9879       # than WaitForSync to the primary node
9880       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9881                     names=[self.target_node, self.other_node])
9882
9883     # Wait for sync
9884     # This can fail as the old devices are degraded and _WaitForSync
9885     # does a combined result over all disks, so we don't check its return value
9886     self.lu.LogStep(cstep, steps_total, "Sync devices")
9887     cstep += 1
9888     _WaitForSync(self.lu, self.instance)
9889
9890     # Check all devices manually
9891     self._CheckDevices(self.instance.primary_node, iv_names)
9892
9893     # Step: remove old storage
9894     if not self.early_release:
9895       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9896       cstep += 1
9897       self._RemoveOldStorage(self.target_node, iv_names)
9898
9899   def _ExecDrbd8Secondary(self, feedback_fn):
9900     """Replace the secondary node for DRBD 8.
9901
9902     The algorithm for replace is quite complicated:
9903       - for all disks of the instance:
9904         - create new LVs on the new node with same names
9905         - shutdown the drbd device on the old secondary
9906         - disconnect the drbd network on the primary
9907         - create the drbd device on the new secondary
9908         - network attach the drbd on the primary, using an artifice:
9909           the drbd code for Attach() will connect to the network if it
9910           finds a device which is connected to the good local disks but
9911           not network enabled
9912       - wait for sync across all devices
9913       - remove all disks from the old secondary
9914
9915     Failures are not very well handled.
9916
9917     """
9918     steps_total = 6
9919
9920     pnode = self.instance.primary_node
9921
9922     # Step: check device activation
9923     self.lu.LogStep(1, steps_total, "Check device existence")
9924     self._CheckDisksExistence([self.instance.primary_node])
9925     self._CheckVolumeGroup([self.instance.primary_node])
9926
9927     # Step: check other node consistency
9928     self.lu.LogStep(2, steps_total, "Check peer consistency")
9929     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9930
9931     # Step: create new storage
9932     self.lu.LogStep(3, steps_total, "Allocate new storage")
9933     for idx, dev in enumerate(self.instance.disks):
9934       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9935                       (self.new_node, idx))
9936       # we pass force_create=True to force LVM creation
9937       for new_lv in dev.children:
9938         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9939                         _GetInstanceInfoText(self.instance), False)
9940
9941     # Step 4: dbrd minors and drbd setups changes
9942     # after this, we must manually remove the drbd minors on both the
9943     # error and the success paths
9944     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9945     minors = self.cfg.AllocateDRBDMinor([self.new_node
9946                                          for dev in self.instance.disks],
9947                                         self.instance.name)
9948     logging.debug("Allocated minors %r", minors)
9949
9950     iv_names = {}
9951     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9952       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9953                       (self.new_node, idx))
9954       # create new devices on new_node; note that we create two IDs:
9955       # one without port, so the drbd will be activated without
9956       # networking information on the new node at this stage, and one
9957       # with network, for the latter activation in step 4
9958       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9959       if self.instance.primary_node == o_node1:
9960         p_minor = o_minor1
9961       else:
9962         assert self.instance.primary_node == o_node2, "Three-node instance?"
9963         p_minor = o_minor2
9964
9965       new_alone_id = (self.instance.primary_node, self.new_node, None,
9966                       p_minor, new_minor, o_secret)
9967       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9968                     p_minor, new_minor, o_secret)
9969
9970       iv_names[idx] = (dev, dev.children, new_net_id)
9971       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9972                     new_net_id)
9973       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9974                               logical_id=new_alone_id,
9975                               children=dev.children,
9976                               size=dev.size)
9977       try:
9978         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9979                               _GetInstanceInfoText(self.instance), False)
9980       except errors.GenericError:
9981         self.cfg.ReleaseDRBDMinors(self.instance.name)
9982         raise
9983
9984     # We have new devices, shutdown the drbd on the old secondary
9985     for idx, dev in enumerate(self.instance.disks):
9986       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9987       self.cfg.SetDiskID(dev, self.target_node)
9988       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9989       if msg:
9990         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9991                            "node: %s" % (idx, msg),
9992                            hint=("Please cleanup this device manually as"
9993                                  " soon as possible"))
9994
9995     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9996     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9997                                                self.instance.disks)[pnode]
9998
9999     msg = result.fail_msg
10000     if msg:
10001       # detaches didn't succeed (unlikely)
10002       self.cfg.ReleaseDRBDMinors(self.instance.name)
10003       raise errors.OpExecError("Can't detach the disks from the network on"
10004                                " old node: %s" % (msg,))
10005
10006     # if we managed to detach at least one, we update all the disks of
10007     # the instance to point to the new secondary
10008     self.lu.LogInfo("Updating instance configuration")
10009     for dev, _, new_logical_id in iv_names.itervalues():
10010       dev.logical_id = new_logical_id
10011       self.cfg.SetDiskID(dev, self.instance.primary_node)
10012
10013     self.cfg.Update(self.instance, feedback_fn)
10014
10015     # and now perform the drbd attach
10016     self.lu.LogInfo("Attaching primary drbds to new secondary"
10017                     " (standalone => connected)")
10018     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10019                                             self.new_node],
10020                                            self.node_secondary_ip,
10021                                            self.instance.disks,
10022                                            self.instance.name,
10023                                            False)
10024     for to_node, to_result in result.items():
10025       msg = to_result.fail_msg
10026       if msg:
10027         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10028                            to_node, msg,
10029                            hint=("please do a gnt-instance info to see the"
10030                                  " status of disks"))
10031     cstep = 5
10032     if self.early_release:
10033       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10034       cstep += 1
10035       self._RemoveOldStorage(self.target_node, iv_names)
10036       # WARNING: we release all node locks here, do not do other RPCs
10037       # than WaitForSync to the primary node
10038       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10039                     names=[self.instance.primary_node,
10040                            self.target_node,
10041                            self.new_node])
10042
10043     # Wait for sync
10044     # This can fail as the old devices are degraded and _WaitForSync
10045     # does a combined result over all disks, so we don't check its return value
10046     self.lu.LogStep(cstep, steps_total, "Sync devices")
10047     cstep += 1
10048     _WaitForSync(self.lu, self.instance)
10049
10050     # Check all devices manually
10051     self._CheckDevices(self.instance.primary_node, iv_names)
10052
10053     # Step: remove old storage
10054     if not self.early_release:
10055       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10056       self._RemoveOldStorage(self.target_node, iv_names)
10057
10058
10059 class LURepairNodeStorage(NoHooksLU):
10060   """Repairs the volume group on a node.
10061
10062   """
10063   REQ_BGL = False
10064
10065   def CheckArguments(self):
10066     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10067
10068     storage_type = self.op.storage_type
10069
10070     if (constants.SO_FIX_CONSISTENCY not in
10071         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10072       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10073                                  " repaired" % storage_type,
10074                                  errors.ECODE_INVAL)
10075
10076   def ExpandNames(self):
10077     self.needed_locks = {
10078       locking.LEVEL_NODE: [self.op.node_name],
10079       }
10080
10081   def _CheckFaultyDisks(self, instance, node_name):
10082     """Ensure faulty disks abort the opcode or at least warn."""
10083     try:
10084       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10085                                   node_name, True):
10086         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10087                                    " node '%s'" % (instance.name, node_name),
10088                                    errors.ECODE_STATE)
10089     except errors.OpPrereqError, err:
10090       if self.op.ignore_consistency:
10091         self.proc.LogWarning(str(err.args[0]))
10092       else:
10093         raise
10094
10095   def CheckPrereq(self):
10096     """Check prerequisites.
10097
10098     """
10099     # Check whether any instance on this node has faulty disks
10100     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10101       if not inst.admin_up:
10102         continue
10103       check_nodes = set(inst.all_nodes)
10104       check_nodes.discard(self.op.node_name)
10105       for inst_node_name in check_nodes:
10106         self._CheckFaultyDisks(inst, inst_node_name)
10107
10108   def Exec(self, feedback_fn):
10109     feedback_fn("Repairing storage unit '%s' on %s ..." %
10110                 (self.op.name, self.op.node_name))
10111
10112     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10113     result = self.rpc.call_storage_execute(self.op.node_name,
10114                                            self.op.storage_type, st_args,
10115                                            self.op.name,
10116                                            constants.SO_FIX_CONSISTENCY)
10117     result.Raise("Failed to repair storage unit '%s' on %s" %
10118                  (self.op.name, self.op.node_name))
10119
10120
10121 class LUNodeEvacuate(NoHooksLU):
10122   """Evacuates instances off a list of nodes.
10123
10124   """
10125   REQ_BGL = False
10126
10127   def CheckArguments(self):
10128     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10129
10130   def ExpandNames(self):
10131     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10132
10133     if self.op.remote_node is not None:
10134       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10135       assert self.op.remote_node
10136
10137       if self.op.remote_node == self.op.node_name:
10138         raise errors.OpPrereqError("Can not use evacuated node as a new"
10139                                    " secondary node", errors.ECODE_INVAL)
10140
10141       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10142         raise errors.OpPrereqError("Without the use of an iallocator only"
10143                                    " secondary instances can be evacuated",
10144                                    errors.ECODE_INVAL)
10145
10146     # Declare locks
10147     self.share_locks = _ShareAll()
10148     self.needed_locks = {
10149       locking.LEVEL_INSTANCE: [],
10150       locking.LEVEL_NODEGROUP: [],
10151       locking.LEVEL_NODE: [],
10152       }
10153
10154     if self.op.remote_node is None:
10155       # Iallocator will choose any node(s) in the same group
10156       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10157     else:
10158       group_nodes = frozenset([self.op.remote_node])
10159
10160     # Determine nodes to be locked
10161     self.lock_nodes = set([self.op.node_name]) | group_nodes
10162
10163   def _DetermineInstances(self):
10164     """Builds list of instances to operate on.
10165
10166     """
10167     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10168
10169     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10170       # Primary instances only
10171       inst_fn = _GetNodePrimaryInstances
10172       assert self.op.remote_node is None, \
10173         "Evacuating primary instances requires iallocator"
10174     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10175       # Secondary instances only
10176       inst_fn = _GetNodeSecondaryInstances
10177     else:
10178       # All instances
10179       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10180       inst_fn = _GetNodeInstances
10181
10182     return inst_fn(self.cfg, self.op.node_name)
10183
10184   def DeclareLocks(self, level):
10185     if level == locking.LEVEL_INSTANCE:
10186       # Lock instances optimistically, needs verification once node and group
10187       # locks have been acquired
10188       self.needed_locks[locking.LEVEL_INSTANCE] = \
10189         set(i.name for i in self._DetermineInstances())
10190
10191     elif level == locking.LEVEL_NODEGROUP:
10192       # Lock node groups optimistically, needs verification once nodes have
10193       # been acquired
10194       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10195         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10196
10197     elif level == locking.LEVEL_NODE:
10198       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10199
10200   def CheckPrereq(self):
10201     # Verify locks
10202     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10203     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10204     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10205
10206     assert owned_nodes == self.lock_nodes
10207
10208     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10209     if owned_groups != wanted_groups:
10210       raise errors.OpExecError("Node groups changed since locks were acquired,"
10211                                " current groups are '%s', used to be '%s'" %
10212                                (utils.CommaJoin(wanted_groups),
10213                                 utils.CommaJoin(owned_groups)))
10214
10215     # Determine affected instances
10216     self.instances = self._DetermineInstances()
10217     self.instance_names = [i.name for i in self.instances]
10218
10219     if set(self.instance_names) != owned_instances:
10220       raise errors.OpExecError("Instances on node '%s' changed since locks"
10221                                " were acquired, current instances are '%s',"
10222                                " used to be '%s'" %
10223                                (self.op.node_name,
10224                                 utils.CommaJoin(self.instance_names),
10225                                 utils.CommaJoin(owned_instances)))
10226
10227     if self.instance_names:
10228       self.LogInfo("Evacuating instances from node '%s': %s",
10229                    self.op.node_name,
10230                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10231     else:
10232       self.LogInfo("No instances to evacuate from node '%s'",
10233                    self.op.node_name)
10234
10235     if self.op.remote_node is not None:
10236       for i in self.instances:
10237         if i.primary_node == self.op.remote_node:
10238           raise errors.OpPrereqError("Node %s is the primary node of"
10239                                      " instance %s, cannot use it as"
10240                                      " secondary" %
10241                                      (self.op.remote_node, i.name),
10242                                      errors.ECODE_INVAL)
10243
10244   def Exec(self, feedback_fn):
10245     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10246
10247     if not self.instance_names:
10248       # No instances to evacuate
10249       jobs = []
10250
10251     elif self.op.iallocator is not None:
10252       # TODO: Implement relocation to other group
10253       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10254                        evac_mode=self.op.mode,
10255                        instances=list(self.instance_names))
10256
10257       ial.Run(self.op.iallocator)
10258
10259       if not ial.success:
10260         raise errors.OpPrereqError("Can't compute node evacuation using"
10261                                    " iallocator '%s': %s" %
10262                                    (self.op.iallocator, ial.info),
10263                                    errors.ECODE_NORES)
10264
10265       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10266
10267     elif self.op.remote_node is not None:
10268       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10269       jobs = [
10270         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10271                                         remote_node=self.op.remote_node,
10272                                         disks=[],
10273                                         mode=constants.REPLACE_DISK_CHG,
10274                                         early_release=self.op.early_release)]
10275         for instance_name in self.instance_names
10276         ]
10277
10278     else:
10279       raise errors.ProgrammerError("No iallocator or remote node")
10280
10281     return ResultWithJobs(jobs)
10282
10283
10284 def _SetOpEarlyRelease(early_release, op):
10285   """Sets C{early_release} flag on opcodes if available.
10286
10287   """
10288   try:
10289     op.early_release = early_release
10290   except AttributeError:
10291     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10292
10293   return op
10294
10295
10296 def _NodeEvacDest(use_nodes, group, nodes):
10297   """Returns group or nodes depending on caller's choice.
10298
10299   """
10300   if use_nodes:
10301     return utils.CommaJoin(nodes)
10302   else:
10303     return group
10304
10305
10306 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10307   """Unpacks the result of change-group and node-evacuate iallocator requests.
10308
10309   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10310   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10311
10312   @type lu: L{LogicalUnit}
10313   @param lu: Logical unit instance
10314   @type alloc_result: tuple/list
10315   @param alloc_result: Result from iallocator
10316   @type early_release: bool
10317   @param early_release: Whether to release locks early if possible
10318   @type use_nodes: bool
10319   @param use_nodes: Whether to display node names instead of groups
10320
10321   """
10322   (moved, failed, jobs) = alloc_result
10323
10324   if failed:
10325     lu.LogWarning("Unable to evacuate instances %s",
10326                   utils.CommaJoin("%s (%s)" % (name, reason)
10327                                   for (name, reason) in failed))
10328
10329   if moved:
10330     lu.LogInfo("Instances to be moved: %s",
10331                utils.CommaJoin("%s (to %s)" %
10332                                (name, _NodeEvacDest(use_nodes, group, nodes))
10333                                for (name, group, nodes) in moved))
10334
10335   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10336               map(opcodes.OpCode.LoadOpCode, ops))
10337           for ops in jobs]
10338
10339
10340 class LUInstanceGrowDisk(LogicalUnit):
10341   """Grow a disk of an instance.
10342
10343   """
10344   HPATH = "disk-grow"
10345   HTYPE = constants.HTYPE_INSTANCE
10346   REQ_BGL = False
10347
10348   def ExpandNames(self):
10349     self._ExpandAndLockInstance()
10350     self.needed_locks[locking.LEVEL_NODE] = []
10351     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10352
10353   def DeclareLocks(self, level):
10354     if level == locking.LEVEL_NODE:
10355       self._LockInstancesNodes()
10356
10357   def BuildHooksEnv(self):
10358     """Build hooks env.
10359
10360     This runs on the master, the primary and all the secondaries.
10361
10362     """
10363     env = {
10364       "DISK": self.op.disk,
10365       "AMOUNT": self.op.amount,
10366       }
10367     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10368     return env
10369
10370   def BuildHooksNodes(self):
10371     """Build hooks nodes.
10372
10373     """
10374     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10375     return (nl, nl)
10376
10377   def CheckPrereq(self):
10378     """Check prerequisites.
10379
10380     This checks that the instance is in the cluster.
10381
10382     """
10383     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10384     assert instance is not None, \
10385       "Cannot retrieve locked instance %s" % self.op.instance_name
10386     nodenames = list(instance.all_nodes)
10387     for node in nodenames:
10388       _CheckNodeOnline(self, node)
10389
10390     self.instance = instance
10391
10392     if instance.disk_template not in constants.DTS_GROWABLE:
10393       raise errors.OpPrereqError("Instance's disk layout does not support"
10394                                  " growing", errors.ECODE_INVAL)
10395
10396     self.disk = instance.FindDisk(self.op.disk)
10397
10398     if instance.disk_template not in (constants.DT_FILE,
10399                                       constants.DT_SHARED_FILE):
10400       # TODO: check the free disk space for file, when that feature will be
10401       # supported
10402       _CheckNodesFreeDiskPerVG(self, nodenames,
10403                                self.disk.ComputeGrowth(self.op.amount))
10404
10405   def Exec(self, feedback_fn):
10406     """Execute disk grow.
10407
10408     """
10409     instance = self.instance
10410     disk = self.disk
10411
10412     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10413     if not disks_ok:
10414       raise errors.OpExecError("Cannot activate block device to grow")
10415
10416     # First run all grow ops in dry-run mode
10417     for node in instance.all_nodes:
10418       self.cfg.SetDiskID(disk, node)
10419       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10420       result.Raise("Grow request failed to node %s" % node)
10421
10422     # We know that (as far as we can test) operations across different
10423     # nodes will succeed, time to run it for real
10424     for node in instance.all_nodes:
10425       self.cfg.SetDiskID(disk, node)
10426       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10427       result.Raise("Grow request failed to node %s" % node)
10428
10429       # TODO: Rewrite code to work properly
10430       # DRBD goes into sync mode for a short amount of time after executing the
10431       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10432       # calling "resize" in sync mode fails. Sleeping for a short amount of
10433       # time is a work-around.
10434       time.sleep(5)
10435
10436     disk.RecordGrow(self.op.amount)
10437     self.cfg.Update(instance, feedback_fn)
10438     if self.op.wait_for_sync:
10439       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10440       if disk_abort:
10441         self.proc.LogWarning("Disk sync-ing has not returned a good"
10442                              " status; please check the instance")
10443       if not instance.admin_up:
10444         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10445     elif not instance.admin_up:
10446       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10447                            " not supposed to be running because no wait for"
10448                            " sync mode was requested")
10449
10450
10451 class LUInstanceQueryData(NoHooksLU):
10452   """Query runtime instance data.
10453
10454   """
10455   REQ_BGL = False
10456
10457   def ExpandNames(self):
10458     self.needed_locks = {}
10459
10460     # Use locking if requested or when non-static information is wanted
10461     if not (self.op.static or self.op.use_locking):
10462       self.LogWarning("Non-static data requested, locks need to be acquired")
10463       self.op.use_locking = True
10464
10465     if self.op.instances or not self.op.use_locking:
10466       # Expand instance names right here
10467       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10468     else:
10469       # Will use acquired locks
10470       self.wanted_names = None
10471
10472     if self.op.use_locking:
10473       self.share_locks = _ShareAll()
10474
10475       if self.wanted_names is None:
10476         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10477       else:
10478         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10479
10480       self.needed_locks[locking.LEVEL_NODE] = []
10481       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10482
10483   def DeclareLocks(self, level):
10484     if self.op.use_locking and level == locking.LEVEL_NODE:
10485       self._LockInstancesNodes()
10486
10487   def CheckPrereq(self):
10488     """Check prerequisites.
10489
10490     This only checks the optional instance list against the existing names.
10491
10492     """
10493     if self.wanted_names is None:
10494       assert self.op.use_locking, "Locking was not used"
10495       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10496
10497     self.wanted_instances = \
10498         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10499
10500   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10501     """Returns the status of a block device
10502
10503     """
10504     if self.op.static or not node:
10505       return None
10506
10507     self.cfg.SetDiskID(dev, node)
10508
10509     result = self.rpc.call_blockdev_find(node, dev)
10510     if result.offline:
10511       return None
10512
10513     result.Raise("Can't compute disk status for %s" % instance_name)
10514
10515     status = result.payload
10516     if status is None:
10517       return None
10518
10519     return (status.dev_path, status.major, status.minor,
10520             status.sync_percent, status.estimated_time,
10521             status.is_degraded, status.ldisk_status)
10522
10523   def _ComputeDiskStatus(self, instance, snode, dev):
10524     """Compute block device status.
10525
10526     """
10527     if dev.dev_type in constants.LDS_DRBD:
10528       # we change the snode then (otherwise we use the one passed in)
10529       if dev.logical_id[0] == instance.primary_node:
10530         snode = dev.logical_id[1]
10531       else:
10532         snode = dev.logical_id[0]
10533
10534     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10535                                               instance.name, dev)
10536     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10537
10538     if dev.children:
10539       dev_children = map(compat.partial(self._ComputeDiskStatus,
10540                                         instance, snode),
10541                          dev.children)
10542     else:
10543       dev_children = []
10544
10545     return {
10546       "iv_name": dev.iv_name,
10547       "dev_type": dev.dev_type,
10548       "logical_id": dev.logical_id,
10549       "physical_id": dev.physical_id,
10550       "pstatus": dev_pstatus,
10551       "sstatus": dev_sstatus,
10552       "children": dev_children,
10553       "mode": dev.mode,
10554       "size": dev.size,
10555       }
10556
10557   def Exec(self, feedback_fn):
10558     """Gather and return data"""
10559     result = {}
10560
10561     cluster = self.cfg.GetClusterInfo()
10562
10563     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10564                                           for i in self.wanted_instances)
10565     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10566       if self.op.static or pnode.offline:
10567         remote_state = None
10568         if pnode.offline:
10569           self.LogWarning("Primary node %s is marked offline, returning static"
10570                           " information only for instance %s" %
10571                           (pnode.name, instance.name))
10572       else:
10573         remote_info = self.rpc.call_instance_info(instance.primary_node,
10574                                                   instance.name,
10575                                                   instance.hypervisor)
10576         remote_info.Raise("Error checking node %s" % instance.primary_node)
10577         remote_info = remote_info.payload
10578         if remote_info and "state" in remote_info:
10579           remote_state = "up"
10580         else:
10581           remote_state = "down"
10582
10583       if instance.admin_up:
10584         config_state = "up"
10585       else:
10586         config_state = "down"
10587
10588       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10589                   instance.disks)
10590
10591       result[instance.name] = {
10592         "name": instance.name,
10593         "config_state": config_state,
10594         "run_state": remote_state,
10595         "pnode": instance.primary_node,
10596         "snodes": instance.secondary_nodes,
10597         "os": instance.os,
10598         # this happens to be the same format used for hooks
10599         "nics": _NICListToTuple(self, instance.nics),
10600         "disk_template": instance.disk_template,
10601         "disks": disks,
10602         "hypervisor": instance.hypervisor,
10603         "network_port": instance.network_port,
10604         "hv_instance": instance.hvparams,
10605         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10606         "be_instance": instance.beparams,
10607         "be_actual": cluster.FillBE(instance),
10608         "os_instance": instance.osparams,
10609         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10610         "serial_no": instance.serial_no,
10611         "mtime": instance.mtime,
10612         "ctime": instance.ctime,
10613         "uuid": instance.uuid,
10614         }
10615
10616     return result
10617
10618
10619 class LUInstanceSetParams(LogicalUnit):
10620   """Modifies an instances's parameters.
10621
10622   """
10623   HPATH = "instance-modify"
10624   HTYPE = constants.HTYPE_INSTANCE
10625   REQ_BGL = False
10626
10627   def CheckArguments(self):
10628     if not (self.op.nics or self.op.disks or self.op.disk_template or
10629             self.op.hvparams or self.op.beparams or self.op.os_name):
10630       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10631
10632     if self.op.hvparams:
10633       _CheckGlobalHvParams(self.op.hvparams)
10634
10635     # Disk validation
10636     disk_addremove = 0
10637     for disk_op, disk_dict in self.op.disks:
10638       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10639       if disk_op == constants.DDM_REMOVE:
10640         disk_addremove += 1
10641         continue
10642       elif disk_op == constants.DDM_ADD:
10643         disk_addremove += 1
10644       else:
10645         if not isinstance(disk_op, int):
10646           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10647         if not isinstance(disk_dict, dict):
10648           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10649           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10650
10651       if disk_op == constants.DDM_ADD:
10652         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10653         if mode not in constants.DISK_ACCESS_SET:
10654           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10655                                      errors.ECODE_INVAL)
10656         size = disk_dict.get(constants.IDISK_SIZE, None)
10657         if size is None:
10658           raise errors.OpPrereqError("Required disk parameter size missing",
10659                                      errors.ECODE_INVAL)
10660         try:
10661           size = int(size)
10662         except (TypeError, ValueError), err:
10663           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10664                                      str(err), errors.ECODE_INVAL)
10665         disk_dict[constants.IDISK_SIZE] = size
10666       else:
10667         # modification of disk
10668         if constants.IDISK_SIZE in disk_dict:
10669           raise errors.OpPrereqError("Disk size change not possible, use"
10670                                      " grow-disk", errors.ECODE_INVAL)
10671
10672     if disk_addremove > 1:
10673       raise errors.OpPrereqError("Only one disk add or remove operation"
10674                                  " supported at a time", errors.ECODE_INVAL)
10675
10676     if self.op.disks and self.op.disk_template is not None:
10677       raise errors.OpPrereqError("Disk template conversion and other disk"
10678                                  " changes not supported at the same time",
10679                                  errors.ECODE_INVAL)
10680
10681     if (self.op.disk_template and
10682         self.op.disk_template in constants.DTS_INT_MIRROR and
10683         self.op.remote_node is None):
10684       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10685                                  " one requires specifying a secondary node",
10686                                  errors.ECODE_INVAL)
10687
10688     # NIC validation
10689     nic_addremove = 0
10690     for nic_op, nic_dict in self.op.nics:
10691       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10692       if nic_op == constants.DDM_REMOVE:
10693         nic_addremove += 1
10694         continue
10695       elif nic_op == constants.DDM_ADD:
10696         nic_addremove += 1
10697       else:
10698         if not isinstance(nic_op, int):
10699           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10700         if not isinstance(nic_dict, dict):
10701           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10702           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10703
10704       # nic_dict should be a dict
10705       nic_ip = nic_dict.get(constants.INIC_IP, None)
10706       if nic_ip is not None:
10707         if nic_ip.lower() == constants.VALUE_NONE:
10708           nic_dict[constants.INIC_IP] = None
10709         else:
10710           if not netutils.IPAddress.IsValid(nic_ip):
10711             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10712                                        errors.ECODE_INVAL)
10713
10714       nic_bridge = nic_dict.get("bridge", None)
10715       nic_link = nic_dict.get(constants.INIC_LINK, None)
10716       if nic_bridge and nic_link:
10717         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10718                                    " at the same time", errors.ECODE_INVAL)
10719       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10720         nic_dict["bridge"] = None
10721       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10722         nic_dict[constants.INIC_LINK] = None
10723
10724       if nic_op == constants.DDM_ADD:
10725         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10726         if nic_mac is None:
10727           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10728
10729       if constants.INIC_MAC in nic_dict:
10730         nic_mac = nic_dict[constants.INIC_MAC]
10731         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10732           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10733
10734         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10735           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10736                                      " modifying an existing nic",
10737                                      errors.ECODE_INVAL)
10738
10739     if nic_addremove > 1:
10740       raise errors.OpPrereqError("Only one NIC add or remove operation"
10741                                  " supported at a time", errors.ECODE_INVAL)
10742
10743   def ExpandNames(self):
10744     self._ExpandAndLockInstance()
10745     self.needed_locks[locking.LEVEL_NODE] = []
10746     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10747
10748   def DeclareLocks(self, level):
10749     if level == locking.LEVEL_NODE:
10750       self._LockInstancesNodes()
10751       if self.op.disk_template and self.op.remote_node:
10752         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10753         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10754
10755   def BuildHooksEnv(self):
10756     """Build hooks env.
10757
10758     This runs on the master, primary and secondaries.
10759
10760     """
10761     args = dict()
10762     if constants.BE_MEMORY in self.be_new:
10763       args["memory"] = self.be_new[constants.BE_MEMORY]
10764     if constants.BE_VCPUS in self.be_new:
10765       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10766     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10767     # information at all.
10768     if self.op.nics:
10769       args["nics"] = []
10770       nic_override = dict(self.op.nics)
10771       for idx, nic in enumerate(self.instance.nics):
10772         if idx in nic_override:
10773           this_nic_override = nic_override[idx]
10774         else:
10775           this_nic_override = {}
10776         if constants.INIC_IP in this_nic_override:
10777           ip = this_nic_override[constants.INIC_IP]
10778         else:
10779           ip = nic.ip
10780         if constants.INIC_MAC in this_nic_override:
10781           mac = this_nic_override[constants.INIC_MAC]
10782         else:
10783           mac = nic.mac
10784         if idx in self.nic_pnew:
10785           nicparams = self.nic_pnew[idx]
10786         else:
10787           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10788         mode = nicparams[constants.NIC_MODE]
10789         link = nicparams[constants.NIC_LINK]
10790         args["nics"].append((ip, mac, mode, link))
10791       if constants.DDM_ADD in nic_override:
10792         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10793         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10794         nicparams = self.nic_pnew[constants.DDM_ADD]
10795         mode = nicparams[constants.NIC_MODE]
10796         link = nicparams[constants.NIC_LINK]
10797         args["nics"].append((ip, mac, mode, link))
10798       elif constants.DDM_REMOVE in nic_override:
10799         del args["nics"][-1]
10800
10801     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10802     if self.op.disk_template:
10803       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10804
10805     return env
10806
10807   def BuildHooksNodes(self):
10808     """Build hooks nodes.
10809
10810     """
10811     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10812     return (nl, nl)
10813
10814   def CheckPrereq(self):
10815     """Check prerequisites.
10816
10817     This only checks the instance list against the existing names.
10818
10819     """
10820     # checking the new params on the primary/secondary nodes
10821
10822     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10823     cluster = self.cluster = self.cfg.GetClusterInfo()
10824     assert self.instance is not None, \
10825       "Cannot retrieve locked instance %s" % self.op.instance_name
10826     pnode = instance.primary_node
10827     nodelist = list(instance.all_nodes)
10828
10829     # OS change
10830     if self.op.os_name and not self.op.force:
10831       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10832                       self.op.force_variant)
10833       instance_os = self.op.os_name
10834     else:
10835       instance_os = instance.os
10836
10837     if self.op.disk_template:
10838       if instance.disk_template == self.op.disk_template:
10839         raise errors.OpPrereqError("Instance already has disk template %s" %
10840                                    instance.disk_template, errors.ECODE_INVAL)
10841
10842       if (instance.disk_template,
10843           self.op.disk_template) not in self._DISK_CONVERSIONS:
10844         raise errors.OpPrereqError("Unsupported disk template conversion from"
10845                                    " %s to %s" % (instance.disk_template,
10846                                                   self.op.disk_template),
10847                                    errors.ECODE_INVAL)
10848       _CheckInstanceDown(self, instance, "cannot change disk template")
10849       if self.op.disk_template in constants.DTS_INT_MIRROR:
10850         if self.op.remote_node == pnode:
10851           raise errors.OpPrereqError("Given new secondary node %s is the same"
10852                                      " as the primary node of the instance" %
10853                                      self.op.remote_node, errors.ECODE_STATE)
10854         _CheckNodeOnline(self, self.op.remote_node)
10855         _CheckNodeNotDrained(self, self.op.remote_node)
10856         # FIXME: here we assume that the old instance type is DT_PLAIN
10857         assert instance.disk_template == constants.DT_PLAIN
10858         disks = [{constants.IDISK_SIZE: d.size,
10859                   constants.IDISK_VG: d.logical_id[0]}
10860                  for d in instance.disks]
10861         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10862         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10863
10864     # hvparams processing
10865     if self.op.hvparams:
10866       hv_type = instance.hypervisor
10867       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10868       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10869       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10870
10871       # local check
10872       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10873       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10874       self.hv_new = hv_new # the new actual values
10875       self.hv_inst = i_hvdict # the new dict (without defaults)
10876     else:
10877       self.hv_new = self.hv_inst = {}
10878
10879     # beparams processing
10880     if self.op.beparams:
10881       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10882                                    use_none=True)
10883       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10884       be_new = cluster.SimpleFillBE(i_bedict)
10885       self.be_new = be_new # the new actual values
10886       self.be_inst = i_bedict # the new dict (without defaults)
10887     else:
10888       self.be_new = self.be_inst = {}
10889     be_old = cluster.FillBE(instance)
10890
10891     # osparams processing
10892     if self.op.osparams:
10893       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10894       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10895       self.os_inst = i_osdict # the new dict (without defaults)
10896     else:
10897       self.os_inst = {}
10898
10899     self.warn = []
10900
10901     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10902         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10903       mem_check_list = [pnode]
10904       if be_new[constants.BE_AUTO_BALANCE]:
10905         # either we changed auto_balance to yes or it was from before
10906         mem_check_list.extend(instance.secondary_nodes)
10907       instance_info = self.rpc.call_instance_info(pnode, instance.name,
10908                                                   instance.hypervisor)
10909       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10910                                          instance.hypervisor)
10911       pninfo = nodeinfo[pnode]
10912       msg = pninfo.fail_msg
10913       if msg:
10914         # Assume the primary node is unreachable and go ahead
10915         self.warn.append("Can't get info from primary node %s: %s" %
10916                          (pnode, msg))
10917       elif not isinstance(pninfo.payload.get("memory_free", None), int):
10918         self.warn.append("Node data from primary node %s doesn't contain"
10919                          " free memory information" % pnode)
10920       elif instance_info.fail_msg:
10921         self.warn.append("Can't get instance runtime information: %s" %
10922                         instance_info.fail_msg)
10923       else:
10924         if instance_info.payload:
10925           current_mem = int(instance_info.payload["memory"])
10926         else:
10927           # Assume instance not running
10928           # (there is a slight race condition here, but it's not very probable,
10929           # and we have no other way to check)
10930           current_mem = 0
10931         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10932                     pninfo.payload["memory_free"])
10933         if miss_mem > 0:
10934           raise errors.OpPrereqError("This change will prevent the instance"
10935                                      " from starting, due to %d MB of memory"
10936                                      " missing on its primary node" % miss_mem,
10937                                      errors.ECODE_NORES)
10938
10939       if be_new[constants.BE_AUTO_BALANCE]:
10940         for node, nres in nodeinfo.items():
10941           if node not in instance.secondary_nodes:
10942             continue
10943           nres.Raise("Can't get info from secondary node %s" % node,
10944                      prereq=True, ecode=errors.ECODE_STATE)
10945           if not isinstance(nres.payload.get("memory_free", None), int):
10946             raise errors.OpPrereqError("Secondary node %s didn't return free"
10947                                        " memory information" % node,
10948                                        errors.ECODE_STATE)
10949           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10950             raise errors.OpPrereqError("This change will prevent the instance"
10951                                        " from failover to its secondary node"
10952                                        " %s, due to not enough memory" % node,
10953                                        errors.ECODE_STATE)
10954
10955     # NIC processing
10956     self.nic_pnew = {}
10957     self.nic_pinst = {}
10958     for nic_op, nic_dict in self.op.nics:
10959       if nic_op == constants.DDM_REMOVE:
10960         if not instance.nics:
10961           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10962                                      errors.ECODE_INVAL)
10963         continue
10964       if nic_op != constants.DDM_ADD:
10965         # an existing nic
10966         if not instance.nics:
10967           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10968                                      " no NICs" % nic_op,
10969                                      errors.ECODE_INVAL)
10970         if nic_op < 0 or nic_op >= len(instance.nics):
10971           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10972                                      " are 0 to %d" %
10973                                      (nic_op, len(instance.nics) - 1),
10974                                      errors.ECODE_INVAL)
10975         old_nic_params = instance.nics[nic_op].nicparams
10976         old_nic_ip = instance.nics[nic_op].ip
10977       else:
10978         old_nic_params = {}
10979         old_nic_ip = None
10980
10981       update_params_dict = dict([(key, nic_dict[key])
10982                                  for key in constants.NICS_PARAMETERS
10983                                  if key in nic_dict])
10984
10985       if "bridge" in nic_dict:
10986         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10987
10988       new_nic_params = _GetUpdatedParams(old_nic_params,
10989                                          update_params_dict)
10990       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10991       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10992       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10993       self.nic_pinst[nic_op] = new_nic_params
10994       self.nic_pnew[nic_op] = new_filled_nic_params
10995       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10996
10997       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10998         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10999         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11000         if msg:
11001           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11002           if self.op.force:
11003             self.warn.append(msg)
11004           else:
11005             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11006       if new_nic_mode == constants.NIC_MODE_ROUTED:
11007         if constants.INIC_IP in nic_dict:
11008           nic_ip = nic_dict[constants.INIC_IP]
11009         else:
11010           nic_ip = old_nic_ip
11011         if nic_ip is None:
11012           raise errors.OpPrereqError("Cannot set the nic ip to None"
11013                                      " on a routed nic", errors.ECODE_INVAL)
11014       if constants.INIC_MAC in nic_dict:
11015         nic_mac = nic_dict[constants.INIC_MAC]
11016         if nic_mac is None:
11017           raise errors.OpPrereqError("Cannot set the nic mac to None",
11018                                      errors.ECODE_INVAL)
11019         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11020           # otherwise generate the mac
11021           nic_dict[constants.INIC_MAC] = \
11022             self.cfg.GenerateMAC(self.proc.GetECId())
11023         else:
11024           # or validate/reserve the current one
11025           try:
11026             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11027           except errors.ReservationError:
11028             raise errors.OpPrereqError("MAC address %s already in use"
11029                                        " in cluster" % nic_mac,
11030                                        errors.ECODE_NOTUNIQUE)
11031
11032     # DISK processing
11033     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11034       raise errors.OpPrereqError("Disk operations not supported for"
11035                                  " diskless instances",
11036                                  errors.ECODE_INVAL)
11037     for disk_op, _ in self.op.disks:
11038       if disk_op == constants.DDM_REMOVE:
11039         if len(instance.disks) == 1:
11040           raise errors.OpPrereqError("Cannot remove the last disk of"
11041                                      " an instance", errors.ECODE_INVAL)
11042         _CheckInstanceDown(self, instance, "cannot remove disks")
11043
11044       if (disk_op == constants.DDM_ADD and
11045           len(instance.disks) >= constants.MAX_DISKS):
11046         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11047                                    " add more" % constants.MAX_DISKS,
11048                                    errors.ECODE_STATE)
11049       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11050         # an existing disk
11051         if disk_op < 0 or disk_op >= len(instance.disks):
11052           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11053                                      " are 0 to %d" %
11054                                      (disk_op, len(instance.disks)),
11055                                      errors.ECODE_INVAL)
11056
11057     return
11058
11059   def _ConvertPlainToDrbd(self, feedback_fn):
11060     """Converts an instance from plain to drbd.
11061
11062     """
11063     feedback_fn("Converting template to drbd")
11064     instance = self.instance
11065     pnode = instance.primary_node
11066     snode = self.op.remote_node
11067
11068     # create a fake disk info for _GenerateDiskTemplate
11069     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11070                   constants.IDISK_VG: d.logical_id[0]}
11071                  for d in instance.disks]
11072     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11073                                       instance.name, pnode, [snode],
11074                                       disk_info, None, None, 0, feedback_fn)
11075     info = _GetInstanceInfoText(instance)
11076     feedback_fn("Creating aditional volumes...")
11077     # first, create the missing data and meta devices
11078     for disk in new_disks:
11079       # unfortunately this is... not too nice
11080       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11081                             info, True)
11082       for child in disk.children:
11083         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11084     # at this stage, all new LVs have been created, we can rename the
11085     # old ones
11086     feedback_fn("Renaming original volumes...")
11087     rename_list = [(o, n.children[0].logical_id)
11088                    for (o, n) in zip(instance.disks, new_disks)]
11089     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11090     result.Raise("Failed to rename original LVs")
11091
11092     feedback_fn("Initializing DRBD devices...")
11093     # all child devices are in place, we can now create the DRBD devices
11094     for disk in new_disks:
11095       for node in [pnode, snode]:
11096         f_create = node == pnode
11097         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11098
11099     # at this point, the instance has been modified
11100     instance.disk_template = constants.DT_DRBD8
11101     instance.disks = new_disks
11102     self.cfg.Update(instance, feedback_fn)
11103
11104     # disks are created, waiting for sync
11105     disk_abort = not _WaitForSync(self, instance,
11106                                   oneshot=not self.op.wait_for_sync)
11107     if disk_abort:
11108       raise errors.OpExecError("There are some degraded disks for"
11109                                " this instance, please cleanup manually")
11110
11111   def _ConvertDrbdToPlain(self, feedback_fn):
11112     """Converts an instance from drbd to plain.
11113
11114     """
11115     instance = self.instance
11116     assert len(instance.secondary_nodes) == 1
11117     pnode = instance.primary_node
11118     snode = instance.secondary_nodes[0]
11119     feedback_fn("Converting template to plain")
11120
11121     old_disks = instance.disks
11122     new_disks = [d.children[0] for d in old_disks]
11123
11124     # copy over size and mode
11125     for parent, child in zip(old_disks, new_disks):
11126       child.size = parent.size
11127       child.mode = parent.mode
11128
11129     # update instance structure
11130     instance.disks = new_disks
11131     instance.disk_template = constants.DT_PLAIN
11132     self.cfg.Update(instance, feedback_fn)
11133
11134     feedback_fn("Removing volumes on the secondary node...")
11135     for disk in old_disks:
11136       self.cfg.SetDiskID(disk, snode)
11137       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11138       if msg:
11139         self.LogWarning("Could not remove block device %s on node %s,"
11140                         " continuing anyway: %s", disk.iv_name, snode, msg)
11141
11142     feedback_fn("Removing unneeded volumes on the primary node...")
11143     for idx, disk in enumerate(old_disks):
11144       meta = disk.children[1]
11145       self.cfg.SetDiskID(meta, pnode)
11146       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11147       if msg:
11148         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11149                         " continuing anyway: %s", idx, pnode, msg)
11150
11151   def Exec(self, feedback_fn):
11152     """Modifies an instance.
11153
11154     All parameters take effect only at the next restart of the instance.
11155
11156     """
11157     # Process here the warnings from CheckPrereq, as we don't have a
11158     # feedback_fn there.
11159     for warn in self.warn:
11160       feedback_fn("WARNING: %s" % warn)
11161
11162     result = []
11163     instance = self.instance
11164     # disk changes
11165     for disk_op, disk_dict in self.op.disks:
11166       if disk_op == constants.DDM_REMOVE:
11167         # remove the last disk
11168         device = instance.disks.pop()
11169         device_idx = len(instance.disks)
11170         for node, disk in device.ComputeNodeTree(instance.primary_node):
11171           self.cfg.SetDiskID(disk, node)
11172           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11173           if msg:
11174             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11175                             " continuing anyway", device_idx, node, msg)
11176         result.append(("disk/%d" % device_idx, "remove"))
11177       elif disk_op == constants.DDM_ADD:
11178         # add a new disk
11179         if instance.disk_template in (constants.DT_FILE,
11180                                         constants.DT_SHARED_FILE):
11181           file_driver, file_path = instance.disks[0].logical_id
11182           file_path = os.path.dirname(file_path)
11183         else:
11184           file_driver = file_path = None
11185         disk_idx_base = len(instance.disks)
11186         new_disk = _GenerateDiskTemplate(self,
11187                                          instance.disk_template,
11188                                          instance.name, instance.primary_node,
11189                                          instance.secondary_nodes,
11190                                          [disk_dict],
11191                                          file_path,
11192                                          file_driver,
11193                                          disk_idx_base, feedback_fn)[0]
11194         instance.disks.append(new_disk)
11195         info = _GetInstanceInfoText(instance)
11196
11197         logging.info("Creating volume %s for instance %s",
11198                      new_disk.iv_name, instance.name)
11199         # Note: this needs to be kept in sync with _CreateDisks
11200         #HARDCODE
11201         for node in instance.all_nodes:
11202           f_create = node == instance.primary_node
11203           try:
11204             _CreateBlockDev(self, node, instance, new_disk,
11205                             f_create, info, f_create)
11206           except errors.OpExecError, err:
11207             self.LogWarning("Failed to create volume %s (%s) on"
11208                             " node %s: %s",
11209                             new_disk.iv_name, new_disk, node, err)
11210         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11211                        (new_disk.size, new_disk.mode)))
11212       else:
11213         # change a given disk
11214         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11215         result.append(("disk.mode/%d" % disk_op,
11216                        disk_dict[constants.IDISK_MODE]))
11217
11218     if self.op.disk_template:
11219       r_shut = _ShutdownInstanceDisks(self, instance)
11220       if not r_shut:
11221         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11222                                  " proceed with disk template conversion")
11223       mode = (instance.disk_template, self.op.disk_template)
11224       try:
11225         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11226       except:
11227         self.cfg.ReleaseDRBDMinors(instance.name)
11228         raise
11229       result.append(("disk_template", self.op.disk_template))
11230
11231     # NIC changes
11232     for nic_op, nic_dict in self.op.nics:
11233       if nic_op == constants.DDM_REMOVE:
11234         # remove the last nic
11235         del instance.nics[-1]
11236         result.append(("nic.%d" % len(instance.nics), "remove"))
11237       elif nic_op == constants.DDM_ADD:
11238         # mac and bridge should be set, by now
11239         mac = nic_dict[constants.INIC_MAC]
11240         ip = nic_dict.get(constants.INIC_IP, None)
11241         nicparams = self.nic_pinst[constants.DDM_ADD]
11242         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11243         instance.nics.append(new_nic)
11244         result.append(("nic.%d" % (len(instance.nics) - 1),
11245                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11246                        (new_nic.mac, new_nic.ip,
11247                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11248                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11249                        )))
11250       else:
11251         for key in (constants.INIC_MAC, constants.INIC_IP):
11252           if key in nic_dict:
11253             setattr(instance.nics[nic_op], key, nic_dict[key])
11254         if nic_op in self.nic_pinst:
11255           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11256         for key, val in nic_dict.iteritems():
11257           result.append(("nic.%s/%d" % (key, nic_op), val))
11258
11259     # hvparams changes
11260     if self.op.hvparams:
11261       instance.hvparams = self.hv_inst
11262       for key, val in self.op.hvparams.iteritems():
11263         result.append(("hv/%s" % key, val))
11264
11265     # beparams changes
11266     if self.op.beparams:
11267       instance.beparams = self.be_inst
11268       for key, val in self.op.beparams.iteritems():
11269         result.append(("be/%s" % key, val))
11270
11271     # OS change
11272     if self.op.os_name:
11273       instance.os = self.op.os_name
11274
11275     # osparams changes
11276     if self.op.osparams:
11277       instance.osparams = self.os_inst
11278       for key, val in self.op.osparams.iteritems():
11279         result.append(("os/%s" % key, val))
11280
11281     self.cfg.Update(instance, feedback_fn)
11282
11283     return result
11284
11285   _DISK_CONVERSIONS = {
11286     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11287     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11288     }
11289
11290
11291 class LUInstanceChangeGroup(LogicalUnit):
11292   HPATH = "instance-change-group"
11293   HTYPE = constants.HTYPE_INSTANCE
11294   REQ_BGL = False
11295
11296   def ExpandNames(self):
11297     self.share_locks = _ShareAll()
11298     self.needed_locks = {
11299       locking.LEVEL_NODEGROUP: [],
11300       locking.LEVEL_NODE: [],
11301       }
11302
11303     self._ExpandAndLockInstance()
11304
11305     if self.op.target_groups:
11306       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11307                                   self.op.target_groups)
11308     else:
11309       self.req_target_uuids = None
11310
11311     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11312
11313   def DeclareLocks(self, level):
11314     if level == locking.LEVEL_NODEGROUP:
11315       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11316
11317       if self.req_target_uuids:
11318         lock_groups = set(self.req_target_uuids)
11319
11320         # Lock all groups used by instance optimistically; this requires going
11321         # via the node before it's locked, requiring verification later on
11322         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11323         lock_groups.update(instance_groups)
11324       else:
11325         # No target groups, need to lock all of them
11326         lock_groups = locking.ALL_SET
11327
11328       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11329
11330     elif level == locking.LEVEL_NODE:
11331       if self.req_target_uuids:
11332         # Lock all nodes used by instances
11333         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11334         self._LockInstancesNodes()
11335
11336         # Lock all nodes in all potential target groups
11337         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11338                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11339         member_nodes = [node_name
11340                         for group in lock_groups
11341                         for node_name in self.cfg.GetNodeGroup(group).members]
11342         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11343       else:
11344         # Lock all nodes as all groups are potential targets
11345         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11346
11347   def CheckPrereq(self):
11348     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11349     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11350     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11351
11352     assert (self.req_target_uuids is None or
11353             owned_groups.issuperset(self.req_target_uuids))
11354     assert owned_instances == set([self.op.instance_name])
11355
11356     # Get instance information
11357     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11358
11359     # Check if node groups for locked instance are still correct
11360     assert owned_nodes.issuperset(self.instance.all_nodes), \
11361       ("Instance %s's nodes changed while we kept the lock" %
11362        self.op.instance_name)
11363
11364     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11365                                            owned_groups)
11366
11367     if self.req_target_uuids:
11368       # User requested specific target groups
11369       self.target_uuids = self.req_target_uuids
11370     else:
11371       # All groups except those used by the instance are potential targets
11372       self.target_uuids = owned_groups - inst_groups
11373
11374     conflicting_groups = self.target_uuids & inst_groups
11375     if conflicting_groups:
11376       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11377                                  " used by the instance '%s'" %
11378                                  (utils.CommaJoin(conflicting_groups),
11379                                   self.op.instance_name),
11380                                  errors.ECODE_INVAL)
11381
11382     if not self.target_uuids:
11383       raise errors.OpPrereqError("There are no possible target groups",
11384                                  errors.ECODE_INVAL)
11385
11386   def BuildHooksEnv(self):
11387     """Build hooks env.
11388
11389     """
11390     assert self.target_uuids
11391
11392     env = {
11393       "TARGET_GROUPS": " ".join(self.target_uuids),
11394       }
11395
11396     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11397
11398     return env
11399
11400   def BuildHooksNodes(self):
11401     """Build hooks nodes.
11402
11403     """
11404     mn = self.cfg.GetMasterNode()
11405     return ([mn], [mn])
11406
11407   def Exec(self, feedback_fn):
11408     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11409
11410     assert instances == [self.op.instance_name], "Instance not locked"
11411
11412     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11413                      instances=instances, target_groups=list(self.target_uuids))
11414
11415     ial.Run(self.op.iallocator)
11416
11417     if not ial.success:
11418       raise errors.OpPrereqError("Can't compute solution for changing group of"
11419                                  " instance '%s' using iallocator '%s': %s" %
11420                                  (self.op.instance_name, self.op.iallocator,
11421                                   ial.info),
11422                                  errors.ECODE_NORES)
11423
11424     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11425
11426     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11427                  " instance '%s'", len(jobs), self.op.instance_name)
11428
11429     return ResultWithJobs(jobs)
11430
11431
11432 class LUBackupQuery(NoHooksLU):
11433   """Query the exports list
11434
11435   """
11436   REQ_BGL = False
11437
11438   def ExpandNames(self):
11439     self.needed_locks = {}
11440     self.share_locks[locking.LEVEL_NODE] = 1
11441     if not self.op.nodes:
11442       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11443     else:
11444       self.needed_locks[locking.LEVEL_NODE] = \
11445         _GetWantedNodes(self, self.op.nodes)
11446
11447   def Exec(self, feedback_fn):
11448     """Compute the list of all the exported system images.
11449
11450     @rtype: dict
11451     @return: a dictionary with the structure node->(export-list)
11452         where export-list is a list of the instances exported on
11453         that node.
11454
11455     """
11456     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11457     rpcresult = self.rpc.call_export_list(self.nodes)
11458     result = {}
11459     for node in rpcresult:
11460       if rpcresult[node].fail_msg:
11461         result[node] = False
11462       else:
11463         result[node] = rpcresult[node].payload
11464
11465     return result
11466
11467
11468 class LUBackupPrepare(NoHooksLU):
11469   """Prepares an instance for an export and returns useful information.
11470
11471   """
11472   REQ_BGL = False
11473
11474   def ExpandNames(self):
11475     self._ExpandAndLockInstance()
11476
11477   def CheckPrereq(self):
11478     """Check prerequisites.
11479
11480     """
11481     instance_name = self.op.instance_name
11482
11483     self.instance = self.cfg.GetInstanceInfo(instance_name)
11484     assert self.instance is not None, \
11485           "Cannot retrieve locked instance %s" % self.op.instance_name
11486     _CheckNodeOnline(self, self.instance.primary_node)
11487
11488     self._cds = _GetClusterDomainSecret()
11489
11490   def Exec(self, feedback_fn):
11491     """Prepares an instance for an export.
11492
11493     """
11494     instance = self.instance
11495
11496     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11497       salt = utils.GenerateSecret(8)
11498
11499       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11500       result = self.rpc.call_x509_cert_create(instance.primary_node,
11501                                               constants.RIE_CERT_VALIDITY)
11502       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11503
11504       (name, cert_pem) = result.payload
11505
11506       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11507                                              cert_pem)
11508
11509       return {
11510         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11511         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11512                           salt),
11513         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11514         }
11515
11516     return None
11517
11518
11519 class LUBackupExport(LogicalUnit):
11520   """Export an instance to an image in the cluster.
11521
11522   """
11523   HPATH = "instance-export"
11524   HTYPE = constants.HTYPE_INSTANCE
11525   REQ_BGL = False
11526
11527   def CheckArguments(self):
11528     """Check the arguments.
11529
11530     """
11531     self.x509_key_name = self.op.x509_key_name
11532     self.dest_x509_ca_pem = self.op.destination_x509_ca
11533
11534     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11535       if not self.x509_key_name:
11536         raise errors.OpPrereqError("Missing X509 key name for encryption",
11537                                    errors.ECODE_INVAL)
11538
11539       if not self.dest_x509_ca_pem:
11540         raise errors.OpPrereqError("Missing destination X509 CA",
11541                                    errors.ECODE_INVAL)
11542
11543   def ExpandNames(self):
11544     self._ExpandAndLockInstance()
11545
11546     # Lock all nodes for local exports
11547     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11548       # FIXME: lock only instance primary and destination node
11549       #
11550       # Sad but true, for now we have do lock all nodes, as we don't know where
11551       # the previous export might be, and in this LU we search for it and
11552       # remove it from its current node. In the future we could fix this by:
11553       #  - making a tasklet to search (share-lock all), then create the
11554       #    new one, then one to remove, after
11555       #  - removing the removal operation altogether
11556       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11557
11558   def DeclareLocks(self, level):
11559     """Last minute lock declaration."""
11560     # All nodes are locked anyway, so nothing to do here.
11561
11562   def BuildHooksEnv(self):
11563     """Build hooks env.
11564
11565     This will run on the master, primary node and target node.
11566
11567     """
11568     env = {
11569       "EXPORT_MODE": self.op.mode,
11570       "EXPORT_NODE": self.op.target_node,
11571       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11572       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11573       # TODO: Generic function for boolean env variables
11574       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11575       }
11576
11577     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11578
11579     return env
11580
11581   def BuildHooksNodes(self):
11582     """Build hooks nodes.
11583
11584     """
11585     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11586
11587     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11588       nl.append(self.op.target_node)
11589
11590     return (nl, nl)
11591
11592   def CheckPrereq(self):
11593     """Check prerequisites.
11594
11595     This checks that the instance and node names are valid.
11596
11597     """
11598     instance_name = self.op.instance_name
11599
11600     self.instance = self.cfg.GetInstanceInfo(instance_name)
11601     assert self.instance is not None, \
11602           "Cannot retrieve locked instance %s" % self.op.instance_name
11603     _CheckNodeOnline(self, self.instance.primary_node)
11604
11605     if (self.op.remove_instance and self.instance.admin_up and
11606         not self.op.shutdown):
11607       raise errors.OpPrereqError("Can not remove instance without shutting it"
11608                                  " down before")
11609
11610     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11611       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11612       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11613       assert self.dst_node is not None
11614
11615       _CheckNodeOnline(self, self.dst_node.name)
11616       _CheckNodeNotDrained(self, self.dst_node.name)
11617
11618       self._cds = None
11619       self.dest_disk_info = None
11620       self.dest_x509_ca = None
11621
11622     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11623       self.dst_node = None
11624
11625       if len(self.op.target_node) != len(self.instance.disks):
11626         raise errors.OpPrereqError(("Received destination information for %s"
11627                                     " disks, but instance %s has %s disks") %
11628                                    (len(self.op.target_node), instance_name,
11629                                     len(self.instance.disks)),
11630                                    errors.ECODE_INVAL)
11631
11632       cds = _GetClusterDomainSecret()
11633
11634       # Check X509 key name
11635       try:
11636         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11637       except (TypeError, ValueError), err:
11638         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11639
11640       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11641         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11642                                    errors.ECODE_INVAL)
11643
11644       # Load and verify CA
11645       try:
11646         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11647       except OpenSSL.crypto.Error, err:
11648         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11649                                    (err, ), errors.ECODE_INVAL)
11650
11651       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11652       if errcode is not None:
11653         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11654                                    (msg, ), errors.ECODE_INVAL)
11655
11656       self.dest_x509_ca = cert
11657
11658       # Verify target information
11659       disk_info = []
11660       for idx, disk_data in enumerate(self.op.target_node):
11661         try:
11662           (host, port, magic) = \
11663             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11664         except errors.GenericError, err:
11665           raise errors.OpPrereqError("Target info for disk %s: %s" %
11666                                      (idx, err), errors.ECODE_INVAL)
11667
11668         disk_info.append((host, port, magic))
11669
11670       assert len(disk_info) == len(self.op.target_node)
11671       self.dest_disk_info = disk_info
11672
11673     else:
11674       raise errors.ProgrammerError("Unhandled export mode %r" %
11675                                    self.op.mode)
11676
11677     # instance disk type verification
11678     # TODO: Implement export support for file-based disks
11679     for disk in self.instance.disks:
11680       if disk.dev_type == constants.LD_FILE:
11681         raise errors.OpPrereqError("Export not supported for instances with"
11682                                    " file-based disks", errors.ECODE_INVAL)
11683
11684   def _CleanupExports(self, feedback_fn):
11685     """Removes exports of current instance from all other nodes.
11686
11687     If an instance in a cluster with nodes A..D was exported to node C, its
11688     exports will be removed from the nodes A, B and D.
11689
11690     """
11691     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11692
11693     nodelist = self.cfg.GetNodeList()
11694     nodelist.remove(self.dst_node.name)
11695
11696     # on one-node clusters nodelist will be empty after the removal
11697     # if we proceed the backup would be removed because OpBackupQuery
11698     # substitutes an empty list with the full cluster node list.
11699     iname = self.instance.name
11700     if nodelist:
11701       feedback_fn("Removing old exports for instance %s" % iname)
11702       exportlist = self.rpc.call_export_list(nodelist)
11703       for node in exportlist:
11704         if exportlist[node].fail_msg:
11705           continue
11706         if iname in exportlist[node].payload:
11707           msg = self.rpc.call_export_remove(node, iname).fail_msg
11708           if msg:
11709             self.LogWarning("Could not remove older export for instance %s"
11710                             " on node %s: %s", iname, node, msg)
11711
11712   def Exec(self, feedback_fn):
11713     """Export an instance to an image in the cluster.
11714
11715     """
11716     assert self.op.mode in constants.EXPORT_MODES
11717
11718     instance = self.instance
11719     src_node = instance.primary_node
11720
11721     if self.op.shutdown:
11722       # shutdown the instance, but not the disks
11723       feedback_fn("Shutting down instance %s" % instance.name)
11724       result = self.rpc.call_instance_shutdown(src_node, instance,
11725                                                self.op.shutdown_timeout)
11726       # TODO: Maybe ignore failures if ignore_remove_failures is set
11727       result.Raise("Could not shutdown instance %s on"
11728                    " node %s" % (instance.name, src_node))
11729
11730     # set the disks ID correctly since call_instance_start needs the
11731     # correct drbd minor to create the symlinks
11732     for disk in instance.disks:
11733       self.cfg.SetDiskID(disk, src_node)
11734
11735     activate_disks = (not instance.admin_up)
11736
11737     if activate_disks:
11738       # Activate the instance disks if we'exporting a stopped instance
11739       feedback_fn("Activating disks for %s" % instance.name)
11740       _StartInstanceDisks(self, instance, None)
11741
11742     try:
11743       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11744                                                      instance)
11745
11746       helper.CreateSnapshots()
11747       try:
11748         if (self.op.shutdown and instance.admin_up and
11749             not self.op.remove_instance):
11750           assert not activate_disks
11751           feedback_fn("Starting instance %s" % instance.name)
11752           result = self.rpc.call_instance_start(src_node, instance,
11753                                                 None, None, False)
11754           msg = result.fail_msg
11755           if msg:
11756             feedback_fn("Failed to start instance: %s" % msg)
11757             _ShutdownInstanceDisks(self, instance)
11758             raise errors.OpExecError("Could not start instance: %s" % msg)
11759
11760         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11761           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11762         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11763           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11764           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11765
11766           (key_name, _, _) = self.x509_key_name
11767
11768           dest_ca_pem = \
11769             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11770                                             self.dest_x509_ca)
11771
11772           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11773                                                      key_name, dest_ca_pem,
11774                                                      timeouts)
11775       finally:
11776         helper.Cleanup()
11777
11778       # Check for backwards compatibility
11779       assert len(dresults) == len(instance.disks)
11780       assert compat.all(isinstance(i, bool) for i in dresults), \
11781              "Not all results are boolean: %r" % dresults
11782
11783     finally:
11784       if activate_disks:
11785         feedback_fn("Deactivating disks for %s" % instance.name)
11786         _ShutdownInstanceDisks(self, instance)
11787
11788     if not (compat.all(dresults) and fin_resu):
11789       failures = []
11790       if not fin_resu:
11791         failures.append("export finalization")
11792       if not compat.all(dresults):
11793         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11794                                if not dsk)
11795         failures.append("disk export: disk(s) %s" % fdsk)
11796
11797       raise errors.OpExecError("Export failed, errors in %s" %
11798                                utils.CommaJoin(failures))
11799
11800     # At this point, the export was successful, we can cleanup/finish
11801
11802     # Remove instance if requested
11803     if self.op.remove_instance:
11804       feedback_fn("Removing instance %s" % instance.name)
11805       _RemoveInstance(self, feedback_fn, instance,
11806                       self.op.ignore_remove_failures)
11807
11808     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11809       self._CleanupExports(feedback_fn)
11810
11811     return fin_resu, dresults
11812
11813
11814 class LUBackupRemove(NoHooksLU):
11815   """Remove exports related to the named instance.
11816
11817   """
11818   REQ_BGL = False
11819
11820   def ExpandNames(self):
11821     self.needed_locks = {}
11822     # We need all nodes to be locked in order for RemoveExport to work, but we
11823     # don't need to lock the instance itself, as nothing will happen to it (and
11824     # we can remove exports also for a removed instance)
11825     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11826
11827   def Exec(self, feedback_fn):
11828     """Remove any export.
11829
11830     """
11831     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11832     # If the instance was not found we'll try with the name that was passed in.
11833     # This will only work if it was an FQDN, though.
11834     fqdn_warn = False
11835     if not instance_name:
11836       fqdn_warn = True
11837       instance_name = self.op.instance_name
11838
11839     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11840     exportlist = self.rpc.call_export_list(locked_nodes)
11841     found = False
11842     for node in exportlist:
11843       msg = exportlist[node].fail_msg
11844       if msg:
11845         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11846         continue
11847       if instance_name in exportlist[node].payload:
11848         found = True
11849         result = self.rpc.call_export_remove(node, instance_name)
11850         msg = result.fail_msg
11851         if msg:
11852           logging.error("Could not remove export for instance %s"
11853                         " on node %s: %s", instance_name, node, msg)
11854
11855     if fqdn_warn and not found:
11856       feedback_fn("Export not found. If trying to remove an export belonging"
11857                   " to a deleted instance please use its Fully Qualified"
11858                   " Domain Name.")
11859
11860
11861 class LUGroupAdd(LogicalUnit):
11862   """Logical unit for creating node groups.
11863
11864   """
11865   HPATH = "group-add"
11866   HTYPE = constants.HTYPE_GROUP
11867   REQ_BGL = False
11868
11869   def ExpandNames(self):
11870     # We need the new group's UUID here so that we can create and acquire the
11871     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11872     # that it should not check whether the UUID exists in the configuration.
11873     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11874     self.needed_locks = {}
11875     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11876
11877   def CheckPrereq(self):
11878     """Check prerequisites.
11879
11880     This checks that the given group name is not an existing node group
11881     already.
11882
11883     """
11884     try:
11885       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11886     except errors.OpPrereqError:
11887       pass
11888     else:
11889       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11890                                  " node group (UUID: %s)" %
11891                                  (self.op.group_name, existing_uuid),
11892                                  errors.ECODE_EXISTS)
11893
11894     if self.op.ndparams:
11895       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11896
11897   def BuildHooksEnv(self):
11898     """Build hooks env.
11899
11900     """
11901     return {
11902       "GROUP_NAME": self.op.group_name,
11903       }
11904
11905   def BuildHooksNodes(self):
11906     """Build hooks nodes.
11907
11908     """
11909     mn = self.cfg.GetMasterNode()
11910     return ([mn], [mn])
11911
11912   def Exec(self, feedback_fn):
11913     """Add the node group to the cluster.
11914
11915     """
11916     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11917                                   uuid=self.group_uuid,
11918                                   alloc_policy=self.op.alloc_policy,
11919                                   ndparams=self.op.ndparams)
11920
11921     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11922     del self.remove_locks[locking.LEVEL_NODEGROUP]
11923
11924
11925 class LUGroupAssignNodes(NoHooksLU):
11926   """Logical unit for assigning nodes to groups.
11927
11928   """
11929   REQ_BGL = False
11930
11931   def ExpandNames(self):
11932     # These raise errors.OpPrereqError on their own:
11933     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11934     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11935
11936     # We want to lock all the affected nodes and groups. We have readily
11937     # available the list of nodes, and the *destination* group. To gather the
11938     # list of "source" groups, we need to fetch node information later on.
11939     self.needed_locks = {
11940       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11941       locking.LEVEL_NODE: self.op.nodes,
11942       }
11943
11944   def DeclareLocks(self, level):
11945     if level == locking.LEVEL_NODEGROUP:
11946       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11947
11948       # Try to get all affected nodes' groups without having the group or node
11949       # lock yet. Needs verification later in the code flow.
11950       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11951
11952       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11953
11954   def CheckPrereq(self):
11955     """Check prerequisites.
11956
11957     """
11958     assert self.needed_locks[locking.LEVEL_NODEGROUP]
11959     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11960             frozenset(self.op.nodes))
11961
11962     expected_locks = (set([self.group_uuid]) |
11963                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11964     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11965     if actual_locks != expected_locks:
11966       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11967                                " current groups are '%s', used to be '%s'" %
11968                                (utils.CommaJoin(expected_locks),
11969                                 utils.CommaJoin(actual_locks)))
11970
11971     self.node_data = self.cfg.GetAllNodesInfo()
11972     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11973     instance_data = self.cfg.GetAllInstancesInfo()
11974
11975     if self.group is None:
11976       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11977                                (self.op.group_name, self.group_uuid))
11978
11979     (new_splits, previous_splits) = \
11980       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11981                                              for node in self.op.nodes],
11982                                             self.node_data, instance_data)
11983
11984     if new_splits:
11985       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11986
11987       if not self.op.force:
11988         raise errors.OpExecError("The following instances get split by this"
11989                                  " change and --force was not given: %s" %
11990                                  fmt_new_splits)
11991       else:
11992         self.LogWarning("This operation will split the following instances: %s",
11993                         fmt_new_splits)
11994
11995         if previous_splits:
11996           self.LogWarning("In addition, these already-split instances continue"
11997                           " to be split across groups: %s",
11998                           utils.CommaJoin(utils.NiceSort(previous_splits)))
11999
12000   def Exec(self, feedback_fn):
12001     """Assign nodes to a new group.
12002
12003     """
12004     for node in self.op.nodes:
12005       self.node_data[node].group = self.group_uuid
12006
12007     # FIXME: Depends on side-effects of modifying the result of
12008     # C{cfg.GetAllNodesInfo}
12009
12010     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12011
12012   @staticmethod
12013   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12014     """Check for split instances after a node assignment.
12015
12016     This method considers a series of node assignments as an atomic operation,
12017     and returns information about split instances after applying the set of
12018     changes.
12019
12020     In particular, it returns information about newly split instances, and
12021     instances that were already split, and remain so after the change.
12022
12023     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12024     considered.
12025
12026     @type changes: list of (node_name, new_group_uuid) pairs.
12027     @param changes: list of node assignments to consider.
12028     @param node_data: a dict with data for all nodes
12029     @param instance_data: a dict with all instances to consider
12030     @rtype: a two-tuple
12031     @return: a list of instances that were previously okay and result split as a
12032       consequence of this change, and a list of instances that were previously
12033       split and this change does not fix.
12034
12035     """
12036     changed_nodes = dict((node, group) for node, group in changes
12037                          if node_data[node].group != group)
12038
12039     all_split_instances = set()
12040     previously_split_instances = set()
12041
12042     def InstanceNodes(instance):
12043       return [instance.primary_node] + list(instance.secondary_nodes)
12044
12045     for inst in instance_data.values():
12046       if inst.disk_template not in constants.DTS_INT_MIRROR:
12047         continue
12048
12049       instance_nodes = InstanceNodes(inst)
12050
12051       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12052         previously_split_instances.add(inst.name)
12053
12054       if len(set(changed_nodes.get(node, node_data[node].group)
12055                  for node in instance_nodes)) > 1:
12056         all_split_instances.add(inst.name)
12057
12058     return (list(all_split_instances - previously_split_instances),
12059             list(previously_split_instances & all_split_instances))
12060
12061
12062 class _GroupQuery(_QueryBase):
12063   FIELDS = query.GROUP_FIELDS
12064
12065   def ExpandNames(self, lu):
12066     lu.needed_locks = {}
12067
12068     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12069     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12070
12071     if not self.names:
12072       self.wanted = [name_to_uuid[name]
12073                      for name in utils.NiceSort(name_to_uuid.keys())]
12074     else:
12075       # Accept names to be either names or UUIDs.
12076       missing = []
12077       self.wanted = []
12078       all_uuid = frozenset(self._all_groups.keys())
12079
12080       for name in self.names:
12081         if name in all_uuid:
12082           self.wanted.append(name)
12083         elif name in name_to_uuid:
12084           self.wanted.append(name_to_uuid[name])
12085         else:
12086           missing.append(name)
12087
12088       if missing:
12089         raise errors.OpPrereqError("Some groups do not exist: %s" %
12090                                    utils.CommaJoin(missing),
12091                                    errors.ECODE_NOENT)
12092
12093   def DeclareLocks(self, lu, level):
12094     pass
12095
12096   def _GetQueryData(self, lu):
12097     """Computes the list of node groups and their attributes.
12098
12099     """
12100     do_nodes = query.GQ_NODE in self.requested_data
12101     do_instances = query.GQ_INST in self.requested_data
12102
12103     group_to_nodes = None
12104     group_to_instances = None
12105
12106     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12107     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12108     # latter GetAllInstancesInfo() is not enough, for we have to go through
12109     # instance->node. Hence, we will need to process nodes even if we only need
12110     # instance information.
12111     if do_nodes or do_instances:
12112       all_nodes = lu.cfg.GetAllNodesInfo()
12113       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12114       node_to_group = {}
12115
12116       for node in all_nodes.values():
12117         if node.group in group_to_nodes:
12118           group_to_nodes[node.group].append(node.name)
12119           node_to_group[node.name] = node.group
12120
12121       if do_instances:
12122         all_instances = lu.cfg.GetAllInstancesInfo()
12123         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12124
12125         for instance in all_instances.values():
12126           node = instance.primary_node
12127           if node in node_to_group:
12128             group_to_instances[node_to_group[node]].append(instance.name)
12129
12130         if not do_nodes:
12131           # Do not pass on node information if it was not requested.
12132           group_to_nodes = None
12133
12134     return query.GroupQueryData([self._all_groups[uuid]
12135                                  for uuid in self.wanted],
12136                                 group_to_nodes, group_to_instances)
12137
12138
12139 class LUGroupQuery(NoHooksLU):
12140   """Logical unit for querying node groups.
12141
12142   """
12143   REQ_BGL = False
12144
12145   def CheckArguments(self):
12146     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12147                           self.op.output_fields, False)
12148
12149   def ExpandNames(self):
12150     self.gq.ExpandNames(self)
12151
12152   def DeclareLocks(self, level):
12153     self.gq.DeclareLocks(self, level)
12154
12155   def Exec(self, feedback_fn):
12156     return self.gq.OldStyleQuery(self)
12157
12158
12159 class LUGroupSetParams(LogicalUnit):
12160   """Modifies the parameters of a node group.
12161
12162   """
12163   HPATH = "group-modify"
12164   HTYPE = constants.HTYPE_GROUP
12165   REQ_BGL = False
12166
12167   def CheckArguments(self):
12168     all_changes = [
12169       self.op.ndparams,
12170       self.op.alloc_policy,
12171       ]
12172
12173     if all_changes.count(None) == len(all_changes):
12174       raise errors.OpPrereqError("Please pass at least one modification",
12175                                  errors.ECODE_INVAL)
12176
12177   def ExpandNames(self):
12178     # This raises errors.OpPrereqError on its own:
12179     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12180
12181     self.needed_locks = {
12182       locking.LEVEL_NODEGROUP: [self.group_uuid],
12183       }
12184
12185   def CheckPrereq(self):
12186     """Check prerequisites.
12187
12188     """
12189     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12190
12191     if self.group is None:
12192       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12193                                (self.op.group_name, self.group_uuid))
12194
12195     if self.op.ndparams:
12196       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12197       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12198       self.new_ndparams = new_ndparams
12199
12200   def BuildHooksEnv(self):
12201     """Build hooks env.
12202
12203     """
12204     return {
12205       "GROUP_NAME": self.op.group_name,
12206       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12207       }
12208
12209   def BuildHooksNodes(self):
12210     """Build hooks nodes.
12211
12212     """
12213     mn = self.cfg.GetMasterNode()
12214     return ([mn], [mn])
12215
12216   def Exec(self, feedback_fn):
12217     """Modifies the node group.
12218
12219     """
12220     result = []
12221
12222     if self.op.ndparams:
12223       self.group.ndparams = self.new_ndparams
12224       result.append(("ndparams", str(self.group.ndparams)))
12225
12226     if self.op.alloc_policy:
12227       self.group.alloc_policy = self.op.alloc_policy
12228
12229     self.cfg.Update(self.group, feedback_fn)
12230     return result
12231
12232
12233 class LUGroupRemove(LogicalUnit):
12234   HPATH = "group-remove"
12235   HTYPE = constants.HTYPE_GROUP
12236   REQ_BGL = False
12237
12238   def ExpandNames(self):
12239     # This will raises errors.OpPrereqError on its own:
12240     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12241     self.needed_locks = {
12242       locking.LEVEL_NODEGROUP: [self.group_uuid],
12243       }
12244
12245   def CheckPrereq(self):
12246     """Check prerequisites.
12247
12248     This checks that the given group name exists as a node group, that is
12249     empty (i.e., contains no nodes), and that is not the last group of the
12250     cluster.
12251
12252     """
12253     # Verify that the group is empty.
12254     group_nodes = [node.name
12255                    for node in self.cfg.GetAllNodesInfo().values()
12256                    if node.group == self.group_uuid]
12257
12258     if group_nodes:
12259       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12260                                  " nodes: %s" %
12261                                  (self.op.group_name,
12262                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12263                                  errors.ECODE_STATE)
12264
12265     # Verify the cluster would not be left group-less.
12266     if len(self.cfg.GetNodeGroupList()) == 1:
12267       raise errors.OpPrereqError("Group '%s' is the only group,"
12268                                  " cannot be removed" %
12269                                  self.op.group_name,
12270                                  errors.ECODE_STATE)
12271
12272   def BuildHooksEnv(self):
12273     """Build hooks env.
12274
12275     """
12276     return {
12277       "GROUP_NAME": self.op.group_name,
12278       }
12279
12280   def BuildHooksNodes(self):
12281     """Build hooks nodes.
12282
12283     """
12284     mn = self.cfg.GetMasterNode()
12285     return ([mn], [mn])
12286
12287   def Exec(self, feedback_fn):
12288     """Remove the node group.
12289
12290     """
12291     try:
12292       self.cfg.RemoveNodeGroup(self.group_uuid)
12293     except errors.ConfigurationError:
12294       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12295                                (self.op.group_name, self.group_uuid))
12296
12297     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12298
12299
12300 class LUGroupRename(LogicalUnit):
12301   HPATH = "group-rename"
12302   HTYPE = constants.HTYPE_GROUP
12303   REQ_BGL = False
12304
12305   def ExpandNames(self):
12306     # This raises errors.OpPrereqError on its own:
12307     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12308
12309     self.needed_locks = {
12310       locking.LEVEL_NODEGROUP: [self.group_uuid],
12311       }
12312
12313   def CheckPrereq(self):
12314     """Check prerequisites.
12315
12316     Ensures requested new name is not yet used.
12317
12318     """
12319     try:
12320       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12321     except errors.OpPrereqError:
12322       pass
12323     else:
12324       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12325                                  " node group (UUID: %s)" %
12326                                  (self.op.new_name, new_name_uuid),
12327                                  errors.ECODE_EXISTS)
12328
12329   def BuildHooksEnv(self):
12330     """Build hooks env.
12331
12332     """
12333     return {
12334       "OLD_NAME": self.op.group_name,
12335       "NEW_NAME": self.op.new_name,
12336       }
12337
12338   def BuildHooksNodes(self):
12339     """Build hooks nodes.
12340
12341     """
12342     mn = self.cfg.GetMasterNode()
12343
12344     all_nodes = self.cfg.GetAllNodesInfo()
12345     all_nodes.pop(mn, None)
12346
12347     run_nodes = [mn]
12348     run_nodes.extend(node.name for node in all_nodes.values()
12349                      if node.group == self.group_uuid)
12350
12351     return (run_nodes, run_nodes)
12352
12353   def Exec(self, feedback_fn):
12354     """Rename the node group.
12355
12356     """
12357     group = self.cfg.GetNodeGroup(self.group_uuid)
12358
12359     if group is None:
12360       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12361                                (self.op.group_name, self.group_uuid))
12362
12363     group.name = self.op.new_name
12364     self.cfg.Update(group, feedback_fn)
12365
12366     return self.op.new_name
12367
12368
12369 class LUGroupEvacuate(LogicalUnit):
12370   HPATH = "group-evacuate"
12371   HTYPE = constants.HTYPE_GROUP
12372   REQ_BGL = False
12373
12374   def ExpandNames(self):
12375     # This raises errors.OpPrereqError on its own:
12376     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12377
12378     if self.op.target_groups:
12379       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12380                                   self.op.target_groups)
12381     else:
12382       self.req_target_uuids = []
12383
12384     if self.group_uuid in self.req_target_uuids:
12385       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12386                                  " as a target group (targets are %s)" %
12387                                  (self.group_uuid,
12388                                   utils.CommaJoin(self.req_target_uuids)),
12389                                  errors.ECODE_INVAL)
12390
12391     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12392
12393     self.share_locks = _ShareAll()
12394     self.needed_locks = {
12395       locking.LEVEL_INSTANCE: [],
12396       locking.LEVEL_NODEGROUP: [],
12397       locking.LEVEL_NODE: [],
12398       }
12399
12400   def DeclareLocks(self, level):
12401     if level == locking.LEVEL_INSTANCE:
12402       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12403
12404       # Lock instances optimistically, needs verification once node and group
12405       # locks have been acquired
12406       self.needed_locks[locking.LEVEL_INSTANCE] = \
12407         self.cfg.GetNodeGroupInstances(self.group_uuid)
12408
12409     elif level == locking.LEVEL_NODEGROUP:
12410       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12411
12412       if self.req_target_uuids:
12413         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12414
12415         # Lock all groups used by instances optimistically; this requires going
12416         # via the node before it's locked, requiring verification later on
12417         lock_groups.update(group_uuid
12418                            for instance_name in
12419                              self.owned_locks(locking.LEVEL_INSTANCE)
12420                            for group_uuid in
12421                              self.cfg.GetInstanceNodeGroups(instance_name))
12422       else:
12423         # No target groups, need to lock all of them
12424         lock_groups = locking.ALL_SET
12425
12426       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12427
12428     elif level == locking.LEVEL_NODE:
12429       # This will only lock the nodes in the group to be evacuated which
12430       # contain actual instances
12431       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12432       self._LockInstancesNodes()
12433
12434       # Lock all nodes in group to be evacuated and target groups
12435       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12436       assert self.group_uuid in owned_groups
12437       member_nodes = [node_name
12438                       for group in owned_groups
12439                       for node_name in self.cfg.GetNodeGroup(group).members]
12440       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12441
12442   def CheckPrereq(self):
12443     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12444     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12445     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12446
12447     assert owned_groups.issuperset(self.req_target_uuids)
12448     assert self.group_uuid in owned_groups
12449
12450     # Check if locked instances are still correct
12451     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12452
12453     # Get instance information
12454     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12455
12456     # Check if node groups for locked instances are still correct
12457     for instance_name in owned_instances:
12458       inst = self.instances[instance_name]
12459       assert owned_nodes.issuperset(inst.all_nodes), \
12460         "Instance %s's nodes changed while we kept the lock" % instance_name
12461
12462       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12463                                              owned_groups)
12464
12465       assert self.group_uuid in inst_groups, \
12466         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12467
12468     if self.req_target_uuids:
12469       # User requested specific target groups
12470       self.target_uuids = self.req_target_uuids
12471     else:
12472       # All groups except the one to be evacuated are potential targets
12473       self.target_uuids = [group_uuid for group_uuid in owned_groups
12474                            if group_uuid != self.group_uuid]
12475
12476       if not self.target_uuids:
12477         raise errors.OpPrereqError("There are no possible target groups",
12478                                    errors.ECODE_INVAL)
12479
12480   def BuildHooksEnv(self):
12481     """Build hooks env.
12482
12483     """
12484     return {
12485       "GROUP_NAME": self.op.group_name,
12486       "TARGET_GROUPS": " ".join(self.target_uuids),
12487       }
12488
12489   def BuildHooksNodes(self):
12490     """Build hooks nodes.
12491
12492     """
12493     mn = self.cfg.GetMasterNode()
12494
12495     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12496
12497     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12498
12499     return (run_nodes, run_nodes)
12500
12501   def Exec(self, feedback_fn):
12502     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12503
12504     assert self.group_uuid not in self.target_uuids
12505
12506     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12507                      instances=instances, target_groups=self.target_uuids)
12508
12509     ial.Run(self.op.iallocator)
12510
12511     if not ial.success:
12512       raise errors.OpPrereqError("Can't compute group evacuation using"
12513                                  " iallocator '%s': %s" %
12514                                  (self.op.iallocator, ial.info),
12515                                  errors.ECODE_NORES)
12516
12517     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12518
12519     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12520                  len(jobs), self.op.group_name)
12521
12522     return ResultWithJobs(jobs)
12523
12524
12525 class TagsLU(NoHooksLU): # pylint: disable=W0223
12526   """Generic tags LU.
12527
12528   This is an abstract class which is the parent of all the other tags LUs.
12529
12530   """
12531   def ExpandNames(self):
12532     self.group_uuid = None
12533     self.needed_locks = {}
12534     if self.op.kind == constants.TAG_NODE:
12535       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12536       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12537     elif self.op.kind == constants.TAG_INSTANCE:
12538       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12539       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12540     elif self.op.kind == constants.TAG_NODEGROUP:
12541       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12542
12543     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12544     # not possible to acquire the BGL based on opcode parameters)
12545
12546   def CheckPrereq(self):
12547     """Check prerequisites.
12548
12549     """
12550     if self.op.kind == constants.TAG_CLUSTER:
12551       self.target = self.cfg.GetClusterInfo()
12552     elif self.op.kind == constants.TAG_NODE:
12553       self.target = self.cfg.GetNodeInfo(self.op.name)
12554     elif self.op.kind == constants.TAG_INSTANCE:
12555       self.target = self.cfg.GetInstanceInfo(self.op.name)
12556     elif self.op.kind == constants.TAG_NODEGROUP:
12557       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12558     else:
12559       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12560                                  str(self.op.kind), errors.ECODE_INVAL)
12561
12562
12563 class LUTagsGet(TagsLU):
12564   """Returns the tags of a given object.
12565
12566   """
12567   REQ_BGL = False
12568
12569   def ExpandNames(self):
12570     TagsLU.ExpandNames(self)
12571
12572     # Share locks as this is only a read operation
12573     self.share_locks = _ShareAll()
12574
12575   def Exec(self, feedback_fn):
12576     """Returns the tag list.
12577
12578     """
12579     return list(self.target.GetTags())
12580
12581
12582 class LUTagsSearch(NoHooksLU):
12583   """Searches the tags for a given pattern.
12584
12585   """
12586   REQ_BGL = False
12587
12588   def ExpandNames(self):
12589     self.needed_locks = {}
12590
12591   def CheckPrereq(self):
12592     """Check prerequisites.
12593
12594     This checks the pattern passed for validity by compiling it.
12595
12596     """
12597     try:
12598       self.re = re.compile(self.op.pattern)
12599     except re.error, err:
12600       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12601                                  (self.op.pattern, err), errors.ECODE_INVAL)
12602
12603   def Exec(self, feedback_fn):
12604     """Returns the tag list.
12605
12606     """
12607     cfg = self.cfg
12608     tgts = [("/cluster", cfg.GetClusterInfo())]
12609     ilist = cfg.GetAllInstancesInfo().values()
12610     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12611     nlist = cfg.GetAllNodesInfo().values()
12612     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12613     tgts.extend(("/nodegroup/%s" % n.name, n)
12614                 for n in cfg.GetAllNodeGroupsInfo().values())
12615     results = []
12616     for path, target in tgts:
12617       for tag in target.GetTags():
12618         if self.re.search(tag):
12619           results.append((path, tag))
12620     return results
12621
12622
12623 class LUTagsSet(TagsLU):
12624   """Sets a tag on a given object.
12625
12626   """
12627   REQ_BGL = False
12628
12629   def CheckPrereq(self):
12630     """Check prerequisites.
12631
12632     This checks the type and length of the tag name and value.
12633
12634     """
12635     TagsLU.CheckPrereq(self)
12636     for tag in self.op.tags:
12637       objects.TaggableObject.ValidateTag(tag)
12638
12639   def Exec(self, feedback_fn):
12640     """Sets the tag.
12641
12642     """
12643     try:
12644       for tag in self.op.tags:
12645         self.target.AddTag(tag)
12646     except errors.TagError, err:
12647       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12648     self.cfg.Update(self.target, feedback_fn)
12649
12650
12651 class LUTagsDel(TagsLU):
12652   """Delete a list of tags from a given object.
12653
12654   """
12655   REQ_BGL = False
12656
12657   def CheckPrereq(self):
12658     """Check prerequisites.
12659
12660     This checks that we have the given tag.
12661
12662     """
12663     TagsLU.CheckPrereq(self)
12664     for tag in self.op.tags:
12665       objects.TaggableObject.ValidateTag(tag)
12666     del_tags = frozenset(self.op.tags)
12667     cur_tags = self.target.GetTags()
12668
12669     diff_tags = del_tags - cur_tags
12670     if diff_tags:
12671       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12672       raise errors.OpPrereqError("Tag(s) %s not found" %
12673                                  (utils.CommaJoin(diff_names), ),
12674                                  errors.ECODE_NOENT)
12675
12676   def Exec(self, feedback_fn):
12677     """Remove the tag from the object.
12678
12679     """
12680     for tag in self.op.tags:
12681       self.target.RemoveTag(tag)
12682     self.cfg.Update(self.target, feedback_fn)
12683
12684
12685 class LUTestDelay(NoHooksLU):
12686   """Sleep for a specified amount of time.
12687
12688   This LU sleeps on the master and/or nodes for a specified amount of
12689   time.
12690
12691   """
12692   REQ_BGL = False
12693
12694   def ExpandNames(self):
12695     """Expand names and set required locks.
12696
12697     This expands the node list, if any.
12698
12699     """
12700     self.needed_locks = {}
12701     if self.op.on_nodes:
12702       # _GetWantedNodes can be used here, but is not always appropriate to use
12703       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12704       # more information.
12705       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12706       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12707
12708   def _TestDelay(self):
12709     """Do the actual sleep.
12710
12711     """
12712     if self.op.on_master:
12713       if not utils.TestDelay(self.op.duration):
12714         raise errors.OpExecError("Error during master delay test")
12715     if self.op.on_nodes:
12716       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12717       for node, node_result in result.items():
12718         node_result.Raise("Failure during rpc call to node %s" % node)
12719
12720   def Exec(self, feedback_fn):
12721     """Execute the test delay opcode, with the wanted repetitions.
12722
12723     """
12724     if self.op.repeat == 0:
12725       self._TestDelay()
12726     else:
12727       top_value = self.op.repeat - 1
12728       for i in range(self.op.repeat):
12729         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12730         self._TestDelay()
12731
12732
12733 class LUTestJqueue(NoHooksLU):
12734   """Utility LU to test some aspects of the job queue.
12735
12736   """
12737   REQ_BGL = False
12738
12739   # Must be lower than default timeout for WaitForJobChange to see whether it
12740   # notices changed jobs
12741   _CLIENT_CONNECT_TIMEOUT = 20.0
12742   _CLIENT_CONFIRM_TIMEOUT = 60.0
12743
12744   @classmethod
12745   def _NotifyUsingSocket(cls, cb, errcls):
12746     """Opens a Unix socket and waits for another program to connect.
12747
12748     @type cb: callable
12749     @param cb: Callback to send socket name to client
12750     @type errcls: class
12751     @param errcls: Exception class to use for errors
12752
12753     """
12754     # Using a temporary directory as there's no easy way to create temporary
12755     # sockets without writing a custom loop around tempfile.mktemp and
12756     # socket.bind
12757     tmpdir = tempfile.mkdtemp()
12758     try:
12759       tmpsock = utils.PathJoin(tmpdir, "sock")
12760
12761       logging.debug("Creating temporary socket at %s", tmpsock)
12762       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12763       try:
12764         sock.bind(tmpsock)
12765         sock.listen(1)
12766
12767         # Send details to client
12768         cb(tmpsock)
12769
12770         # Wait for client to connect before continuing
12771         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12772         try:
12773           (conn, _) = sock.accept()
12774         except socket.error, err:
12775           raise errcls("Client didn't connect in time (%s)" % err)
12776       finally:
12777         sock.close()
12778     finally:
12779       # Remove as soon as client is connected
12780       shutil.rmtree(tmpdir)
12781
12782     # Wait for client to close
12783     try:
12784       try:
12785         # pylint: disable=E1101
12786         # Instance of '_socketobject' has no ... member
12787         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12788         conn.recv(1)
12789       except socket.error, err:
12790         raise errcls("Client failed to confirm notification (%s)" % err)
12791     finally:
12792       conn.close()
12793
12794   def _SendNotification(self, test, arg, sockname):
12795     """Sends a notification to the client.
12796
12797     @type test: string
12798     @param test: Test name
12799     @param arg: Test argument (depends on test)
12800     @type sockname: string
12801     @param sockname: Socket path
12802
12803     """
12804     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12805
12806   def _Notify(self, prereq, test, arg):
12807     """Notifies the client of a test.
12808
12809     @type prereq: bool
12810     @param prereq: Whether this is a prereq-phase test
12811     @type test: string
12812     @param test: Test name
12813     @param arg: Test argument (depends on test)
12814
12815     """
12816     if prereq:
12817       errcls = errors.OpPrereqError
12818     else:
12819       errcls = errors.OpExecError
12820
12821     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12822                                                   test, arg),
12823                                    errcls)
12824
12825   def CheckArguments(self):
12826     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12827     self.expandnames_calls = 0
12828
12829   def ExpandNames(self):
12830     checkargs_calls = getattr(self, "checkargs_calls", 0)
12831     if checkargs_calls < 1:
12832       raise errors.ProgrammerError("CheckArguments was not called")
12833
12834     self.expandnames_calls += 1
12835
12836     if self.op.notify_waitlock:
12837       self._Notify(True, constants.JQT_EXPANDNAMES, None)
12838
12839     self.LogInfo("Expanding names")
12840
12841     # Get lock on master node (just to get a lock, not for a particular reason)
12842     self.needed_locks = {
12843       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12844       }
12845
12846   def Exec(self, feedback_fn):
12847     if self.expandnames_calls < 1:
12848       raise errors.ProgrammerError("ExpandNames was not called")
12849
12850     if self.op.notify_exec:
12851       self._Notify(False, constants.JQT_EXEC, None)
12852
12853     self.LogInfo("Executing")
12854
12855     if self.op.log_messages:
12856       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12857       for idx, msg in enumerate(self.op.log_messages):
12858         self.LogInfo("Sending log message %s", idx + 1)
12859         feedback_fn(constants.JQT_MSGPREFIX + msg)
12860         # Report how many test messages have been sent
12861         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12862
12863     if self.op.fail:
12864       raise errors.OpExecError("Opcode failure was requested")
12865
12866     return True
12867
12868
12869 class IAllocator(object):
12870   """IAllocator framework.
12871
12872   An IAllocator instance has three sets of attributes:
12873     - cfg that is needed to query the cluster
12874     - input data (all members of the _KEYS class attribute are required)
12875     - four buffer attributes (in|out_data|text), that represent the
12876       input (to the external script) in text and data structure format,
12877       and the output from it, again in two formats
12878     - the result variables from the script (success, info, nodes) for
12879       easy usage
12880
12881   """
12882   # pylint: disable=R0902
12883   # lots of instance attributes
12884
12885   def __init__(self, cfg, rpc, mode, **kwargs):
12886     self.cfg = cfg
12887     self.rpc = rpc
12888     # init buffer variables
12889     self.in_text = self.out_text = self.in_data = self.out_data = None
12890     # init all input fields so that pylint is happy
12891     self.mode = mode
12892     self.memory = self.disks = self.disk_template = None
12893     self.os = self.tags = self.nics = self.vcpus = None
12894     self.hypervisor = None
12895     self.relocate_from = None
12896     self.name = None
12897     self.instances = None
12898     self.evac_mode = None
12899     self.target_groups = []
12900     # computed fields
12901     self.required_nodes = None
12902     # init result fields
12903     self.success = self.info = self.result = None
12904
12905     try:
12906       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12907     except KeyError:
12908       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12909                                    " IAllocator" % self.mode)
12910
12911     keyset = [n for (n, _) in keydata]
12912
12913     for key in kwargs:
12914       if key not in keyset:
12915         raise errors.ProgrammerError("Invalid input parameter '%s' to"
12916                                      " IAllocator" % key)
12917       setattr(self, key, kwargs[key])
12918
12919     for key in keyset:
12920       if key not in kwargs:
12921         raise errors.ProgrammerError("Missing input parameter '%s' to"
12922                                      " IAllocator" % key)
12923     self._BuildInputData(compat.partial(fn, self), keydata)
12924
12925   def _ComputeClusterData(self):
12926     """Compute the generic allocator input data.
12927
12928     This is the data that is independent of the actual operation.
12929
12930     """
12931     cfg = self.cfg
12932     cluster_info = cfg.GetClusterInfo()
12933     # cluster data
12934     data = {
12935       "version": constants.IALLOCATOR_VERSION,
12936       "cluster_name": cfg.GetClusterName(),
12937       "cluster_tags": list(cluster_info.GetTags()),
12938       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12939       # we don't have job IDs
12940       }
12941     ninfo = cfg.GetAllNodesInfo()
12942     iinfo = cfg.GetAllInstancesInfo().values()
12943     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12944
12945     # node data
12946     node_list = [n.name for n in ninfo.values() if n.vm_capable]
12947
12948     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12949       hypervisor_name = self.hypervisor
12950     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12951       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12952     else:
12953       hypervisor_name = cluster_info.enabled_hypervisors[0]
12954
12955     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12956                                         hypervisor_name)
12957     node_iinfo = \
12958       self.rpc.call_all_instances_info(node_list,
12959                                        cluster_info.enabled_hypervisors)
12960
12961     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12962
12963     config_ndata = self._ComputeBasicNodeData(ninfo)
12964     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12965                                                  i_list, config_ndata)
12966     assert len(data["nodes"]) == len(ninfo), \
12967         "Incomplete node data computed"
12968
12969     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12970
12971     self.in_data = data
12972
12973   @staticmethod
12974   def _ComputeNodeGroupData(cfg):
12975     """Compute node groups data.
12976
12977     """
12978     ng = dict((guuid, {
12979       "name": gdata.name,
12980       "alloc_policy": gdata.alloc_policy,
12981       })
12982       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12983
12984     return ng
12985
12986   @staticmethod
12987   def _ComputeBasicNodeData(node_cfg):
12988     """Compute global node data.
12989
12990     @rtype: dict
12991     @returns: a dict of name: (node dict, node config)
12992
12993     """
12994     # fill in static (config-based) values
12995     node_results = dict((ninfo.name, {
12996       "tags": list(ninfo.GetTags()),
12997       "primary_ip": ninfo.primary_ip,
12998       "secondary_ip": ninfo.secondary_ip,
12999       "offline": ninfo.offline,
13000       "drained": ninfo.drained,
13001       "master_candidate": ninfo.master_candidate,
13002       "group": ninfo.group,
13003       "master_capable": ninfo.master_capable,
13004       "vm_capable": ninfo.vm_capable,
13005       })
13006       for ninfo in node_cfg.values())
13007
13008     return node_results
13009
13010   @staticmethod
13011   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13012                               node_results):
13013     """Compute global node data.
13014
13015     @param node_results: the basic node structures as filled from the config
13016
13017     """
13018     # make a copy of the current dict
13019     node_results = dict(node_results)
13020     for nname, nresult in node_data.items():
13021       assert nname in node_results, "Missing basic data for node %s" % nname
13022       ninfo = node_cfg[nname]
13023
13024       if not (ninfo.offline or ninfo.drained):
13025         nresult.Raise("Can't get data for node %s" % nname)
13026         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13027                                 nname)
13028         remote_info = nresult.payload
13029
13030         for attr in ["memory_total", "memory_free", "memory_dom0",
13031                      "vg_size", "vg_free", "cpu_total"]:
13032           if attr not in remote_info:
13033             raise errors.OpExecError("Node '%s' didn't return attribute"
13034                                      " '%s'" % (nname, attr))
13035           if not isinstance(remote_info[attr], int):
13036             raise errors.OpExecError("Node '%s' returned invalid value"
13037                                      " for '%s': %s" %
13038                                      (nname, attr, remote_info[attr]))
13039         # compute memory used by primary instances
13040         i_p_mem = i_p_up_mem = 0
13041         for iinfo, beinfo in i_list:
13042           if iinfo.primary_node == nname:
13043             i_p_mem += beinfo[constants.BE_MEMORY]
13044             if iinfo.name not in node_iinfo[nname].payload:
13045               i_used_mem = 0
13046             else:
13047               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13048             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13049             remote_info["memory_free"] -= max(0, i_mem_diff)
13050
13051             if iinfo.admin_up:
13052               i_p_up_mem += beinfo[constants.BE_MEMORY]
13053
13054         # compute memory used by instances
13055         pnr_dyn = {
13056           "total_memory": remote_info["memory_total"],
13057           "reserved_memory": remote_info["memory_dom0"],
13058           "free_memory": remote_info["memory_free"],
13059           "total_disk": remote_info["vg_size"],
13060           "free_disk": remote_info["vg_free"],
13061           "total_cpus": remote_info["cpu_total"],
13062           "i_pri_memory": i_p_mem,
13063           "i_pri_up_memory": i_p_up_mem,
13064           }
13065         pnr_dyn.update(node_results[nname])
13066         node_results[nname] = pnr_dyn
13067
13068     return node_results
13069
13070   @staticmethod
13071   def _ComputeInstanceData(cluster_info, i_list):
13072     """Compute global instance data.
13073
13074     """
13075     instance_data = {}
13076     for iinfo, beinfo in i_list:
13077       nic_data = []
13078       for nic in iinfo.nics:
13079         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13080         nic_dict = {
13081           "mac": nic.mac,
13082           "ip": nic.ip,
13083           "mode": filled_params[constants.NIC_MODE],
13084           "link": filled_params[constants.NIC_LINK],
13085           }
13086         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13087           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13088         nic_data.append(nic_dict)
13089       pir = {
13090         "tags": list(iinfo.GetTags()),
13091         "admin_up": iinfo.admin_up,
13092         "vcpus": beinfo[constants.BE_VCPUS],
13093         "memory": beinfo[constants.BE_MEMORY],
13094         "os": iinfo.os,
13095         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13096         "nics": nic_data,
13097         "disks": [{constants.IDISK_SIZE: dsk.size,
13098                    constants.IDISK_MODE: dsk.mode}
13099                   for dsk in iinfo.disks],
13100         "disk_template": iinfo.disk_template,
13101         "hypervisor": iinfo.hypervisor,
13102         }
13103       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13104                                                  pir["disks"])
13105       instance_data[iinfo.name] = pir
13106
13107     return instance_data
13108
13109   def _AddNewInstance(self):
13110     """Add new instance data to allocator structure.
13111
13112     This in combination with _AllocatorGetClusterData will create the
13113     correct structure needed as input for the allocator.
13114
13115     The checks for the completeness of the opcode must have already been
13116     done.
13117
13118     """
13119     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13120
13121     if self.disk_template in constants.DTS_INT_MIRROR:
13122       self.required_nodes = 2
13123     else:
13124       self.required_nodes = 1
13125
13126     request = {
13127       "name": self.name,
13128       "disk_template": self.disk_template,
13129       "tags": self.tags,
13130       "os": self.os,
13131       "vcpus": self.vcpus,
13132       "memory": self.memory,
13133       "disks": self.disks,
13134       "disk_space_total": disk_space,
13135       "nics": self.nics,
13136       "required_nodes": self.required_nodes,
13137       "hypervisor": self.hypervisor,
13138       }
13139
13140     return request
13141
13142   def _AddRelocateInstance(self):
13143     """Add relocate instance data to allocator structure.
13144
13145     This in combination with _IAllocatorGetClusterData will create the
13146     correct structure needed as input for the allocator.
13147
13148     The checks for the completeness of the opcode must have already been
13149     done.
13150
13151     """
13152     instance = self.cfg.GetInstanceInfo(self.name)
13153     if instance is None:
13154       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13155                                    " IAllocator" % self.name)
13156
13157     if instance.disk_template not in constants.DTS_MIRRORED:
13158       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13159                                  errors.ECODE_INVAL)
13160
13161     if instance.disk_template in constants.DTS_INT_MIRROR and \
13162         len(instance.secondary_nodes) != 1:
13163       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13164                                  errors.ECODE_STATE)
13165
13166     self.required_nodes = 1
13167     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13168     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13169
13170     request = {
13171       "name": self.name,
13172       "disk_space_total": disk_space,
13173       "required_nodes": self.required_nodes,
13174       "relocate_from": self.relocate_from,
13175       }
13176     return request
13177
13178   def _AddNodeEvacuate(self):
13179     """Get data for node-evacuate requests.
13180
13181     """
13182     return {
13183       "instances": self.instances,
13184       "evac_mode": self.evac_mode,
13185       }
13186
13187   def _AddChangeGroup(self):
13188     """Get data for node-evacuate requests.
13189
13190     """
13191     return {
13192       "instances": self.instances,
13193       "target_groups": self.target_groups,
13194       }
13195
13196   def _BuildInputData(self, fn, keydata):
13197     """Build input data structures.
13198
13199     """
13200     self._ComputeClusterData()
13201
13202     request = fn()
13203     request["type"] = self.mode
13204     for keyname, keytype in keydata:
13205       if keyname not in request:
13206         raise errors.ProgrammerError("Request parameter %s is missing" %
13207                                      keyname)
13208       val = request[keyname]
13209       if not keytype(val):
13210         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13211                                      " validation, value %s, expected"
13212                                      " type %s" % (keyname, val, keytype))
13213     self.in_data["request"] = request
13214
13215     self.in_text = serializer.Dump(self.in_data)
13216
13217   _STRING_LIST = ht.TListOf(ht.TString)
13218   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13219      # pylint: disable=E1101
13220      # Class '...' has no 'OP_ID' member
13221      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13222                           opcodes.OpInstanceMigrate.OP_ID,
13223                           opcodes.OpInstanceReplaceDisks.OP_ID])
13224      })))
13225
13226   _NEVAC_MOVED = \
13227     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13228                        ht.TItems([ht.TNonEmptyString,
13229                                   ht.TNonEmptyString,
13230                                   ht.TListOf(ht.TNonEmptyString),
13231                                  ])))
13232   _NEVAC_FAILED = \
13233     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13234                        ht.TItems([ht.TNonEmptyString,
13235                                   ht.TMaybeString,
13236                                  ])))
13237   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13238                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13239
13240   _MODE_DATA = {
13241     constants.IALLOCATOR_MODE_ALLOC:
13242       (_AddNewInstance,
13243        [
13244         ("name", ht.TString),
13245         ("memory", ht.TInt),
13246         ("disks", ht.TListOf(ht.TDict)),
13247         ("disk_template", ht.TString),
13248         ("os", ht.TString),
13249         ("tags", _STRING_LIST),
13250         ("nics", ht.TListOf(ht.TDict)),
13251         ("vcpus", ht.TInt),
13252         ("hypervisor", ht.TString),
13253         ], ht.TList),
13254     constants.IALLOCATOR_MODE_RELOC:
13255       (_AddRelocateInstance,
13256        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13257        ht.TList),
13258      constants.IALLOCATOR_MODE_NODE_EVAC:
13259       (_AddNodeEvacuate, [
13260         ("instances", _STRING_LIST),
13261         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13262         ], _NEVAC_RESULT),
13263      constants.IALLOCATOR_MODE_CHG_GROUP:
13264       (_AddChangeGroup, [
13265         ("instances", _STRING_LIST),
13266         ("target_groups", _STRING_LIST),
13267         ], _NEVAC_RESULT),
13268     }
13269
13270   def Run(self, name, validate=True, call_fn=None):
13271     """Run an instance allocator and return the results.
13272
13273     """
13274     if call_fn is None:
13275       call_fn = self.rpc.call_iallocator_runner
13276
13277     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13278     result.Raise("Failure while running the iallocator script")
13279
13280     self.out_text = result.payload
13281     if validate:
13282       self._ValidateResult()
13283
13284   def _ValidateResult(self):
13285     """Process the allocator results.
13286
13287     This will process and if successful save the result in
13288     self.out_data and the other parameters.
13289
13290     """
13291     try:
13292       rdict = serializer.Load(self.out_text)
13293     except Exception, err:
13294       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13295
13296     if not isinstance(rdict, dict):
13297       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13298
13299     # TODO: remove backwards compatiblity in later versions
13300     if "nodes" in rdict and "result" not in rdict:
13301       rdict["result"] = rdict["nodes"]
13302       del rdict["nodes"]
13303
13304     for key in "success", "info", "result":
13305       if key not in rdict:
13306         raise errors.OpExecError("Can't parse iallocator results:"
13307                                  " missing key '%s'" % key)
13308       setattr(self, key, rdict[key])
13309
13310     if not self._result_check(self.result):
13311       raise errors.OpExecError("Iallocator returned invalid result,"
13312                                " expected %s, got %s" %
13313                                (self._result_check, self.result),
13314                                errors.ECODE_INVAL)
13315
13316     if self.mode == constants.IALLOCATOR_MODE_RELOC:
13317       assert self.relocate_from is not None
13318       assert self.required_nodes == 1
13319
13320       node2group = dict((name, ndata["group"])
13321                         for (name, ndata) in self.in_data["nodes"].items())
13322
13323       fn = compat.partial(self._NodesToGroups, node2group,
13324                           self.in_data["nodegroups"])
13325
13326       instance = self.cfg.GetInstanceInfo(self.name)
13327       request_groups = fn(self.relocate_from + [instance.primary_node])
13328       result_groups = fn(rdict["result"] + [instance.primary_node])
13329
13330       if self.success and not set(result_groups).issubset(request_groups):
13331         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13332                                  " differ from original groups (%s)" %
13333                                  (utils.CommaJoin(result_groups),
13334                                   utils.CommaJoin(request_groups)))
13335
13336     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13337       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13338
13339     self.out_data = rdict
13340
13341   @staticmethod
13342   def _NodesToGroups(node2group, groups, nodes):
13343     """Returns a list of unique group names for a list of nodes.
13344
13345     @type node2group: dict
13346     @param node2group: Map from node name to group UUID
13347     @type groups: dict
13348     @param groups: Group information
13349     @type nodes: list
13350     @param nodes: Node names
13351
13352     """
13353     result = set()
13354
13355     for node in nodes:
13356       try:
13357         group_uuid = node2group[node]
13358       except KeyError:
13359         # Ignore unknown node
13360         pass
13361       else:
13362         try:
13363           group = groups[group_uuid]
13364         except KeyError:
13365           # Can't find group, let's use UUID
13366           group_name = group_uuid
13367         else:
13368           group_name = group["name"]
13369
13370         result.add(group_name)
13371
13372     return sorted(result)
13373
13374
13375 class LUTestAllocator(NoHooksLU):
13376   """Run allocator tests.
13377
13378   This LU runs the allocator tests
13379
13380   """
13381   def CheckPrereq(self):
13382     """Check prerequisites.
13383
13384     This checks the opcode parameters depending on the director and mode test.
13385
13386     """
13387     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13388       for attr in ["memory", "disks", "disk_template",
13389                    "os", "tags", "nics", "vcpus"]:
13390         if not hasattr(self.op, attr):
13391           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13392                                      attr, errors.ECODE_INVAL)
13393       iname = self.cfg.ExpandInstanceName(self.op.name)
13394       if iname is not None:
13395         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13396                                    iname, errors.ECODE_EXISTS)
13397       if not isinstance(self.op.nics, list):
13398         raise errors.OpPrereqError("Invalid parameter 'nics'",
13399                                    errors.ECODE_INVAL)
13400       if not isinstance(self.op.disks, list):
13401         raise errors.OpPrereqError("Invalid parameter 'disks'",
13402                                    errors.ECODE_INVAL)
13403       for row in self.op.disks:
13404         if (not isinstance(row, dict) or
13405             constants.IDISK_SIZE not in row or
13406             not isinstance(row[constants.IDISK_SIZE], int) or
13407             constants.IDISK_MODE not in row or
13408             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13409           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13410                                      " parameter", errors.ECODE_INVAL)
13411       if self.op.hypervisor is None:
13412         self.op.hypervisor = self.cfg.GetHypervisorType()
13413     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13414       fname = _ExpandInstanceName(self.cfg, self.op.name)
13415       self.op.name = fname
13416       self.relocate_from = \
13417           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13418     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13419                           constants.IALLOCATOR_MODE_NODE_EVAC):
13420       if not self.op.instances:
13421         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13422       self.op.instances = _GetWantedInstances(self, self.op.instances)
13423     else:
13424       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13425                                  self.op.mode, errors.ECODE_INVAL)
13426
13427     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13428       if self.op.allocator is None:
13429         raise errors.OpPrereqError("Missing allocator name",
13430                                    errors.ECODE_INVAL)
13431     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13432       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13433                                  self.op.direction, errors.ECODE_INVAL)
13434
13435   def Exec(self, feedback_fn):
13436     """Run the allocator test.
13437
13438     """
13439     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13440       ial = IAllocator(self.cfg, self.rpc,
13441                        mode=self.op.mode,
13442                        name=self.op.name,
13443                        memory=self.op.memory,
13444                        disks=self.op.disks,
13445                        disk_template=self.op.disk_template,
13446                        os=self.op.os,
13447                        tags=self.op.tags,
13448                        nics=self.op.nics,
13449                        vcpus=self.op.vcpus,
13450                        hypervisor=self.op.hypervisor,
13451                        )
13452     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13453       ial = IAllocator(self.cfg, self.rpc,
13454                        mode=self.op.mode,
13455                        name=self.op.name,
13456                        relocate_from=list(self.relocate_from),
13457                        )
13458     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13459       ial = IAllocator(self.cfg, self.rpc,
13460                        mode=self.op.mode,
13461                        instances=self.op.instances,
13462                        target_groups=self.op.target_groups)
13463     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13464       ial = IAllocator(self.cfg, self.rpc,
13465                        mode=self.op.mode,
13466                        instances=self.op.instances,
13467                        evac_mode=self.op.evac_mode)
13468     else:
13469       raise errors.ProgrammerError("Uncatched mode %s in"
13470                                    " LUTestAllocator.Exec", self.op.mode)
13471
13472     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13473       result = ial.in_text
13474     else:
13475       ial.Run(self.op.allocator, validate=False)
13476       result = ial.out_text
13477     return result
13478
13479
13480 #: Query type implementations
13481 _QUERY_IMPL = {
13482   constants.QR_INSTANCE: _InstanceQuery,
13483   constants.QR_NODE: _NodeQuery,
13484   constants.QR_GROUP: _GroupQuery,
13485   constants.QR_OS: _OsQuery,
13486   }
13487
13488 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13489
13490
13491 def _GetQueryImplementation(name):
13492   """Returns the implemtnation for a query type.
13493
13494   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13495
13496   """
13497   try:
13498     return _QUERY_IMPL[name]
13499   except KeyError:
13500     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13501                                errors.ECODE_INVAL)