code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62
  63 import ganeti.masterd.instance # pylint: disable=W0611
  64
  65
  66 class ResultWithJobs:
  67   """Data container for LU results with jobs.
  68
  69   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  70   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  71   contained in the C{jobs} attribute and include the job IDs in the opcode
  72   result.
  73
  74   """
  75   def __init__(self, jobs, **kwargs):
  76     """Initializes this class.
  77
  78     Additional return values can be specified as keyword arguments.
  79
  80     @type jobs: list of lists of L{opcode.OpCode}
  81     @param jobs: A list of lists of opcode objects
  82
  83     """
  84     self.jobs = jobs
  85     self.other = kwargs
  86
  87
  88 class LogicalUnit(object):
  89   """Logical Unit base class.
  90
  91   Subclasses must follow these rules:
  92     - implement ExpandNames
  93     - implement CheckPrereq (except when tasklets are used)
  94     - implement Exec (except when tasklets are used)
  95     - implement BuildHooksEnv
  96     - implement BuildHooksNodes
  97     - redefine HPATH and HTYPE
  98     - optionally redefine their run requirements:
  99         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 100
 101   Note that all commands require root permissions.
 102
 103   @ivar dry_run_result: the value (if any) that will be returned to the caller
 104       in dry-run mode (signalled by opcode dry_run parameter)
 105
 106   """
 107   HPATH = None
 108   HTYPE = None
 109   REQ_BGL = True
 110
 111   def __init__(self, processor, op, context, rpc):
 112     """Constructor for LogicalUnit.
 113
 114     This needs to be overridden in derived classes in order to check op
 115     validity.
 116
 117     """
 118     self.proc = processor
 119     self.op = op
 120     self.cfg = context.cfg
 121     self.glm = context.glm
 122     # readability alias
 123     self.owned_locks = context.glm.list_owned
 124     self.context = context
 125     self.rpc = rpc
 126     # Dicts used to declare locking needs to mcpu
 127     self.needed_locks = None
 128     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 129     self.add_locks = {}
 130     self.remove_locks = {}
 131     # Used to force good behavior when calling helper functions
 132     self.recalculate_locks = {}
 133     # logging
 134     self.Log = processor.Log # pylint: disable=C0103
 135     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 136     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 137     self.LogStep = processor.LogStep # pylint: disable=C0103
 138     # support for dry-run
 139     self.dry_run_result = None
 140     # support for generic debug attribute
 141     if (not hasattr(self.op, "debug_level") or
 142         not isinstance(self.op.debug_level, int)):
 143       self.op.debug_level = 0
 144
 145     # Tasklets
 146     self.tasklets = None
 147
 148     # Validate opcode parameters and set defaults
 149     self.op.Validate(True)
 150
 151     self.CheckArguments()
 152
 153   def CheckArguments(self):
 154     """Check syntactic validity for the opcode arguments.
 155
 156     This method is for doing a simple syntactic check and ensure
 157     validity of opcode parameters, without any cluster-related
 158     checks. While the same can be accomplished in ExpandNames and/or
 159     CheckPrereq, doing these separate is better because:
 160
 161       - ExpandNames is left as as purely a lock-related function
 162       - CheckPrereq is run after we have acquired locks (and possible
 163         waited for them)
 164
 165     The function is allowed to change the self.op attribute so that
 166     later methods can no longer worry about missing parameters.
 167
 168     """
 169     pass
 170
 171   def ExpandNames(self):
 172     """Expand names for this LU.
 173
 174     This method is called before starting to execute the opcode, and it should
 175     update all the parameters of the opcode to their canonical form (e.g. a
 176     short node name must be fully expanded after this method has successfully
 177     completed). This way locking, hooks, logging, etc. can work correctly.
 178
 179     LUs which implement this method must also populate the self.needed_locks
 180     member, as a dict with lock levels as keys, and a list of needed lock names
 181     as values. Rules:
 182
 183       - use an empty dict if you don't need any lock
 184       - if you don't need any lock at a particular level omit that level
 185       - don't put anything for the BGL level
 186       - if you want all locks at a level use locking.ALL_SET as a value
 187
 188     If you need to share locks (rather than acquire them exclusively) at one
 189     level you can modify self.share_locks, setting a true value (usually 1) for
 190     that level. By default locks are not shared.
 191
 192     This function can also define a list of tasklets, which then will be
 193     executed in order instead of the usual LU-level CheckPrereq and Exec
 194     functions, if those are not defined by the LU.
 195
 196     Examples::
 197
 198       # Acquire all nodes and one instance
 199       self.needed_locks = {
 200         locking.LEVEL_NODE: locking.ALL_SET,
 201         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 202       }
 203       # Acquire just two nodes
 204       self.needed_locks = {
 205         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 206       }
 207       # Acquire no locks
 208       self.needed_locks = {} # No, you can't leave it to the default value None
 209
 210     """
 211     # The implementation of this method is mandatory only if the new LU is
 212     # concurrent, so that old LUs don't need to be changed all at the same
 213     # time.
 214     if self.REQ_BGL:
 215       self.needed_locks = {} # Exclusive LUs don't need locks.
 216     else:
 217       raise NotImplementedError
 218
 219   def DeclareLocks(self, level):
 220     """Declare LU locking needs for a level
 221
 222     While most LUs can just declare their locking needs at ExpandNames time,
 223     sometimes there's the need to calculate some locks after having acquired
 224     the ones before. This function is called just before acquiring locks at a
 225     particular level, but after acquiring the ones at lower levels, and permits
 226     such calculations. It can be used to modify self.needed_locks, and by
 227     default it does nothing.
 228
 229     This function is only called if you have something already set in
 230     self.needed_locks for the level.
 231
 232     @param level: Locking level which is going to be locked
 233     @type level: member of ganeti.locking.LEVELS
 234
 235     """
 236
 237   def CheckPrereq(self):
 238     """Check prerequisites for this LU.
 239
 240     This method should check that the prerequisites for the execution
 241     of this LU are fulfilled. It can do internode communication, but
 242     it should be idempotent - no cluster or system changes are
 243     allowed.
 244
 245     The method should raise errors.OpPrereqError in case something is
 246     not fulfilled. Its return value is ignored.
 247
 248     This method should also update all the parameters of the opcode to
 249     their canonical form if it hasn't been done by ExpandNames before.
 250
 251     """
 252     if self.tasklets is not None:
 253       for (idx, tl) in enumerate(self.tasklets):
 254         logging.debug("Checking prerequisites for tasklet %s/%s",
 255                       idx + 1, len(self.tasklets))
 256         tl.CheckPrereq()
 257     else:
 258       pass
 259
 260   def Exec(self, feedback_fn):
 261     """Execute the LU.
 262
 263     This method should implement the actual work. It should raise
 264     errors.OpExecError for failures that are somewhat dealt with in
 265     code, or expected.
 266
 267     """
 268     if self.tasklets is not None:
 269       for (idx, tl) in enumerate(self.tasklets):
 270         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 271         tl.Exec(feedback_fn)
 272     else:
 273       raise NotImplementedError
 274
 275   def BuildHooksEnv(self):
 276     """Build hooks environment for this LU.
 277
 278     @rtype: dict
 279     @return: Dictionary containing the environment that will be used for
 280       running the hooks for this LU. The keys of the dict must not be prefixed
 281       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 282       will extend the environment with additional variables. If no environment
 283       should be defined, an empty dictionary should be returned (not C{None}).
 284     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 285       will not be called.
 286
 287     """
 288     raise NotImplementedError
 289
 290   def BuildHooksNodes(self):
 291     """Build list of nodes to run LU's hooks.
 292
 293     @rtype: tuple; (list, list)
 294     @return: Tuple containing a list of node names on which the hook
 295       should run before the execution and a list of node names on which the
 296       hook should run after the execution. No nodes should be returned as an
 297       empty list (and not None).
 298     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 299       will not be called.
 300
 301     """
 302     raise NotImplementedError
 303
 304   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 305     """Notify the LU about the results of its hooks.
 306
 307     This method is called every time a hooks phase is executed, and notifies
 308     the Logical Unit about the hooks' result. The LU can then use it to alter
 309     its result based on the hooks.  By default the method does nothing and the
 310     previous result is passed back unchanged but any LU can define it if it
 311     wants to use the local cluster hook-scripts somehow.
 312
 313     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 314         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 315     @param hook_results: the results of the multi-node hooks rpc call
 316     @param feedback_fn: function used send feedback back to the caller
 317     @param lu_result: the previous Exec result this LU had, or None
 318         in the PRE phase
 319     @return: the new Exec result, based on the previous result
 320         and hook results
 321
 322     """
 323     # API must be kept, thus we ignore the unused argument and could
 324     # be a function warnings
 325     # pylint: disable=W0613,R0201
 326     return lu_result
 327
 328   def _ExpandAndLockInstance(self):
 329     """Helper function to expand and lock an instance.
 330
 331     Many LUs that work on an instance take its name in self.op.instance_name
 332     and need to expand it and then declare the expanded name for locking. This
 333     function does it, and then updates self.op.instance_name to the expanded
 334     name. It also initializes needed_locks as a dict, if this hasn't been done
 335     before.
 336
 337     """
 338     if self.needed_locks is None:
 339       self.needed_locks = {}
 340     else:
 341       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 342         "_ExpandAndLockInstance called with instance-level locks set"
 343     self.op.instance_name = _ExpandInstanceName(self.cfg,
 344                                                 self.op.instance_name)
 345     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 346
 347   def _LockInstancesNodes(self, primary_only=False):
 348     """Helper function to declare instances' nodes for locking.
 349
 350     This function should be called after locking one or more instances to lock
 351     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 352     with all primary or secondary nodes for instances already locked and
 353     present in self.needed_locks[locking.LEVEL_INSTANCE].
 354
 355     It should be called from DeclareLocks, and for safety only works if
 356     self.recalculate_locks[locking.LEVEL_NODE] is set.
 357
 358     In the future it may grow parameters to just lock some instance's nodes, or
 359     to just lock primaries or secondary nodes, if needed.
 360
 361     If should be called in DeclareLocks in a way similar to::
 362
 363       if level == locking.LEVEL_NODE:
 364         self._LockInstancesNodes()
 365
 366     @type primary_only: boolean
 367     @param primary_only: only lock primary nodes of locked instances
 368
 369     """
 370     assert locking.LEVEL_NODE in self.recalculate_locks, \
 371       "_LockInstancesNodes helper function called with no nodes to recalculate"
 372
 373     # TODO: check if we're really been called with the instance locks held
 374
 375     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 376     # future we might want to have different behaviors depending on the value
 377     # of self.recalculate_locks[locking.LEVEL_NODE]
 378     wanted_nodes = []
 379     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 380     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 381       wanted_nodes.append(instance.primary_node)
 382       if not primary_only:
 383         wanted_nodes.extend(instance.secondary_nodes)
 384
 385     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 386       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 387     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 388       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 389
 390     del self.recalculate_locks[locking.LEVEL_NODE]
 391
 392
 393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 394   """Simple LU which runs no hooks.
 395
 396   This LU is intended as a parent for other LogicalUnits which will
 397   run no hooks, in order to reduce duplicate code.
 398
 399   """
 400   HPATH = None
 401   HTYPE = None
 402
 403   def BuildHooksEnv(self):
 404     """Empty BuildHooksEnv for NoHooksLu.
 405
 406     This just raises an error.
 407
 408     """
 409     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 410
 411   def BuildHooksNodes(self):
 412     """Empty BuildHooksNodes for NoHooksLU.
 413
 414     """
 415     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 416
 417
 418 class Tasklet:
 419   """Tasklet base class.
 420
 421   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 422   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 423   tasklets know nothing about locks.
 424
 425   Subclasses must follow these rules:
 426     - Implement CheckPrereq
 427     - Implement Exec
 428
 429   """
 430   def __init__(self, lu):
 431     self.lu = lu
 432
 433     # Shortcuts
 434     self.cfg = lu.cfg
 435     self.rpc = lu.rpc
 436
 437   def CheckPrereq(self):
 438     """Check prerequisites for this tasklets.
 439
 440     This method should check whether the prerequisites for the execution of
 441     this tasklet are fulfilled. It can do internode communication, but it
 442     should be idempotent - no cluster or system changes are allowed.
 443
 444     The method should raise errors.OpPrereqError in case something is not
 445     fulfilled. Its return value is ignored.
 446
 447     This method should also update all parameters to their canonical form if it
 448     hasn't been done before.
 449
 450     """
 451     pass
 452
 453   def Exec(self, feedback_fn):
 454     """Execute the tasklet.
 455
 456     This method should implement the actual work. It should raise
 457     errors.OpExecError for failures that are somewhat dealt with in code, or
 458     expected.
 459
 460     """
 461     raise NotImplementedError
 462
 463
 464 class _QueryBase:
 465   """Base for query utility classes.
 466
 467   """
 468   #: Attribute holding field definitions
 469   FIELDS = None
 470
 471   def __init__(self, qfilter, fields, use_locking):
 472     """Initializes this class.
 473
 474     """
 475     self.use_locking = use_locking
 476
 477     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 478                              namefield="name")
 479     self.requested_data = self.query.RequestedData()
 480     self.names = self.query.RequestedNames()
 481
 482     # Sort only if no names were requested
 483     self.sort_by_name = not self.names
 484
 485     self.do_locking = None
 486     self.wanted = None
 487
 488   def _GetNames(self, lu, all_names, lock_level):
 489     """Helper function to determine names asked for in the query.
 490
 491     """
 492     if self.do_locking:
 493       names = lu.owned_locks(lock_level)
 494     else:
 495       names = all_names
 496
 497     if self.wanted == locking.ALL_SET:
 498       assert not self.names
 499       # caller didn't specify names, so ordering is not important
 500       return utils.NiceSort(names)
 501
 502     # caller specified names and we must keep the same order
 503     assert self.names
 504     assert not self.do_locking or lu.glm.is_owned(lock_level)
 505
 506     missing = set(self.wanted).difference(names)
 507     if missing:
 508       raise errors.OpExecError("Some items were removed before retrieving"
 509                                " their data: %s" % missing)
 510
 511     # Return expanded names
 512     return self.wanted
 513
 514   def ExpandNames(self, lu):
 515     """Expand names for this query.
 516
 517     See L{LogicalUnit.ExpandNames}.
 518
 519     """
 520     raise NotImplementedError()
 521
 522   def DeclareLocks(self, lu, level):
 523     """Declare locks for this query.
 524
 525     See L{LogicalUnit.DeclareLocks}.
 526
 527     """
 528     raise NotImplementedError()
 529
 530   def _GetQueryData(self, lu):
 531     """Collects all data for this query.
 532
 533     @return: Query data object
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def NewStyleQuery(self, lu):
 539     """Collect data and execute query.
 540
 541     """
 542     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 543                                   sort_by_name=self.sort_by_name)
 544
 545   def OldStyleQuery(self, lu):
 546     """Collect data and execute query.
 547
 548     """
 549     return self.query.OldStyleQuery(self._GetQueryData(lu),
 550                                     sort_by_name=self.sort_by_name)
 551
 552
 553 def _ShareAll():
 554   """Returns a dict declaring all lock levels shared.
 555
 556   """
 557   return dict.fromkeys(locking.LEVELS, 1)
 558
 559
 560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 561   """Checks if the owned node groups are still correct for an instance.
 562
 563   @type cfg: L{config.ConfigWriter}
 564   @param cfg: The cluster configuration
 565   @type instance_name: string
 566   @param instance_name: Instance name
 567   @type owned_groups: set or frozenset
 568   @param owned_groups: List of currently owned node groups
 569
 570   """
 571   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 572
 573   if not owned_groups.issuperset(inst_groups):
 574     raise errors.OpPrereqError("Instance %s's node groups changed since"
 575                                " locks were acquired, current groups are"
 576                                " are '%s', owning groups '%s'; retry the"
 577                                " operation" %
 578                                (instance_name,
 579                                 utils.CommaJoin(inst_groups),
 580                                 utils.CommaJoin(owned_groups)),
 581                                errors.ECODE_STATE)
 582
 583   return inst_groups
 584
 585
 586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 587   """Checks if the instances in a node group are still correct.
 588
 589   @type cfg: L{config.ConfigWriter}
 590   @param cfg: The cluster configuration
 591   @type group_uuid: string
 592   @param group_uuid: Node group UUID
 593   @type owned_instances: set or frozenset
 594   @param owned_instances: List of currently owned instances
 595
 596   """
 597   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 598   if owned_instances != wanted_instances:
 599     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 600                                " locks were acquired, wanted '%s', have '%s';"
 601                                " retry the operation" %
 602                                (group_uuid,
 603                                 utils.CommaJoin(wanted_instances),
 604                                 utils.CommaJoin(owned_instances)),
 605                                errors.ECODE_STATE)
 606
 607   return wanted_instances
 608
 609
 610 def _SupportsOob(cfg, node):
 611   """Tells if node supports OOB.
 612
 613   @type cfg: L{config.ConfigWriter}
 614   @param cfg: The cluster configuration
 615   @type node: L{objects.Node}
 616   @param node: The node
 617   @return: The OOB script if supported or an empty string otherwise
 618
 619   """
 620   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 621
 622
 623 def _GetWantedNodes(lu, nodes):
 624   """Returns list of checked and expanded node names.
 625
 626   @type lu: L{LogicalUnit}
 627   @param lu: the logical unit on whose behalf we execute
 628   @type nodes: list
 629   @param nodes: list of node names or None for all nodes
 630   @rtype: list
 631   @return: the list of nodes, sorted
 632   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 633
 634   """
 635   if nodes:
 636     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 637
 638   return utils.NiceSort(lu.cfg.GetNodeList())
 639
 640
 641 def _GetWantedInstances(lu, instances):
 642   """Returns list of checked and expanded instance names.
 643
 644   @type lu: L{LogicalUnit}
 645   @param lu: the logical unit on whose behalf we execute
 646   @type instances: list
 647   @param instances: list of instance names or None for all instances
 648   @rtype: list
 649   @return: the list of instances, sorted
 650   @raise errors.OpPrereqError: if the instances parameter is wrong type
 651   @raise errors.OpPrereqError: if any of the passed instances is not found
 652
 653   """
 654   if instances:
 655     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 656   else:
 657     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 658   return wanted
 659
 660
 661 def _GetUpdatedParams(old_params, update_dict,
 662                       use_default=True, use_none=False):
 663   """Return the new version of a parameter dictionary.
 664
 665   @type old_params: dict
 666   @param old_params: old parameters
 667   @type update_dict: dict
 668   @param update_dict: dict containing new parameter values, or
 669       constants.VALUE_DEFAULT to reset the parameter to its default
 670       value
 671   @param use_default: boolean
 672   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 673       values as 'to be deleted' values
 674   @param use_none: boolean
 675   @type use_none: whether to recognise C{None} values as 'to be
 676       deleted' values
 677   @rtype: dict
 678   @return: the new parameter dictionary
 679
 680   """
 681   params_copy = copy.deepcopy(old_params)
 682   for key, val in update_dict.iteritems():
 683     if ((use_default and val == constants.VALUE_DEFAULT) or
 684         (use_none and val is None)):
 685       try:
 686         del params_copy[key]
 687       except KeyError:
 688         pass
 689     else:
 690       params_copy[key] = val
 691   return params_copy
 692
 693
 694 def _ReleaseLocks(lu, level, names=None, keep=None):
 695   """Releases locks owned by an LU.
 696
 697   @type lu: L{LogicalUnit}
 698   @param level: Lock level
 699   @type names: list or None
 700   @param names: Names of locks to release
 701   @type keep: list or None
 702   @param keep: Names of locks to retain
 703
 704   """
 705   assert not (keep is not None and names is not None), \
 706          "Only one of the 'names' and the 'keep' parameters can be given"
 707
 708   if names is not None:
 709     should_release = names.__contains__
 710   elif keep:
 711     should_release = lambda name: name not in keep
 712   else:
 713     should_release = None
 714
 715   if should_release:
 716     retain = []
 717     release = []
 718
 719     # Determine which locks to release
 720     for name in lu.owned_locks(level):
 721       if should_release(name):
 722         release.append(name)
 723       else:
 724         retain.append(name)
 725
 726     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 727
 728     # Release just some locks
 729     lu.glm.release(level, names=release)
 730
 731     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 732   else:
 733     # Release everything
 734     lu.glm.release(level)
 735
 736     assert not lu.glm.is_owned(level), "No locks should be owned"
 737
 738
 739 def _MapInstanceDisksToNodes(instances):
 740   """Creates a map from (node, volume) to instance name.
 741
 742   @type instances: list of L{objects.Instance}
 743   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 744
 745   """
 746   return dict(((node, vol), inst.name)
 747               for inst in instances
 748               for (node, vols) in inst.MapLVsByNode().items()
 749               for vol in vols)
 750
 751
 752 def _RunPostHook(lu, node_name):
 753   """Runs the post-hook for an opcode on a single node.
 754
 755   """
 756   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 757   try:
 758     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 759   except:
 760     # pylint: disable=W0702
 761     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 762
 763
 764 def _CheckOutputFields(static, dynamic, selected):
 765   """Checks whether all selected fields are valid.
 766
 767   @type static: L{utils.FieldSet}
 768   @param static: static fields set
 769   @type dynamic: L{utils.FieldSet}
 770   @param dynamic: dynamic fields set
 771
 772   """
 773   f = utils.FieldSet()
 774   f.Extend(static)
 775   f.Extend(dynamic)
 776
 777   delta = f.NonMatching(selected)
 778   if delta:
 779     raise errors.OpPrereqError("Unknown output fields selected: %s"
 780                                % ",".join(delta), errors.ECODE_INVAL)
 781
 782
 783 def _CheckGlobalHvParams(params):
 784   """Validates that given hypervisor params are not global ones.
 785
 786   This will ensure that instances don't get customised versions of
 787   global params.
 788
 789   """
 790   used_globals = constants.HVC_GLOBALS.intersection(params)
 791   if used_globals:
 792     msg = ("The following hypervisor parameters are global and cannot"
 793            " be customized at instance level, please modify them at"
 794            " cluster level: %s" % utils.CommaJoin(used_globals))
 795     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 796
 797
 798 def _CheckNodeOnline(lu, node, msg=None):
 799   """Ensure that a given node is online.
 800
 801   @param lu: the LU on behalf of which we make the check
 802   @param node: the node to check
 803   @param msg: if passed, should be a message to replace the default one
 804   @raise errors.OpPrereqError: if the node is offline
 805
 806   """
 807   if msg is None:
 808     msg = "Can't use offline node"
 809   if lu.cfg.GetNodeInfo(node).offline:
 810     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 811
 812
 813 def _CheckNodeNotDrained(lu, node):
 814   """Ensure that a given node is not drained.
 815
 816   @param lu: the LU on behalf of which we make the check
 817   @param node: the node to check
 818   @raise errors.OpPrereqError: if the node is drained
 819
 820   """
 821   if lu.cfg.GetNodeInfo(node).drained:
 822     raise errors.OpPrereqError("Can't use drained node %s" % node,
 823                                errors.ECODE_STATE)
 824
 825
 826 def _CheckNodeVmCapable(lu, node):
 827   """Ensure that a given node is vm capable.
 828
 829   @param lu: the LU on behalf of which we make the check
 830   @param node: the node to check
 831   @raise errors.OpPrereqError: if the node is not vm capable
 832
 833   """
 834   if not lu.cfg.GetNodeInfo(node).vm_capable:
 835     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 836                                errors.ECODE_STATE)
 837
 838
 839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 840   """Ensure that a node supports a given OS.
 841
 842   @param lu: the LU on behalf of which we make the check
 843   @param node: the node to check
 844   @param os_name: the OS to query about
 845   @param force_variant: whether to ignore variant errors
 846   @raise errors.OpPrereqError: if the node is not supporting the OS
 847
 848   """
 849   result = lu.rpc.call_os_get(node, os_name)
 850   result.Raise("OS '%s' not in supported OS list for node %s" %
 851                (os_name, node),
 852                prereq=True, ecode=errors.ECODE_INVAL)
 853   if not force_variant:
 854     _CheckOSVariant(result.payload, os_name)
 855
 856
 857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 858   """Ensure that a node has the given secondary ip.
 859
 860   @type lu: L{LogicalUnit}
 861   @param lu: the LU on behalf of which we make the check
 862   @type node: string
 863   @param node: the node to check
 864   @type secondary_ip: string
 865   @param secondary_ip: the ip to check
 866   @type prereq: boolean
 867   @param prereq: whether to throw a prerequisite or an execute error
 868   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 869   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 870
 871   """
 872   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 873   result.Raise("Failure checking secondary ip on node %s" % node,
 874                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 875   if not result.payload:
 876     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 877            " please fix and re-run this command" % secondary_ip)
 878     if prereq:
 879       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 880     else:
 881       raise errors.OpExecError(msg)
 882
 883
 884 def _GetClusterDomainSecret():
 885   """Reads the cluster domain secret.
 886
 887   """
 888   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 889                                strict=True)
 890
 891
 892 def _CheckInstanceDown(lu, instance, reason):
 893   """Ensure that an instance is not running."""
 894   if instance.admin_up:
 895     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 896                                (instance.name, reason), errors.ECODE_STATE)
 897
 898   pnode = instance.primary_node
 899   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 900   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 901               prereq=True, ecode=errors.ECODE_ENVIRON)
 902
 903   if instance.name in ins_l.payload:
 904     raise errors.OpPrereqError("Instance %s is running, %s" %
 905                                (instance.name, reason), errors.ECODE_STATE)
 906
 907
 908 def _ExpandItemName(fn, name, kind):
 909   """Expand an item name.
 910
 911   @param fn: the function to use for expansion
 912   @param name: requested item name
 913   @param kind: text description ('Node' or 'Instance')
 914   @return: the resolved (full) name
 915   @raise errors.OpPrereqError: if the item is not found
 916
 917   """
 918   full_name = fn(name)
 919   if full_name is None:
 920     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 921                                errors.ECODE_NOENT)
 922   return full_name
 923
 924
 925 def _ExpandNodeName(cfg, name):
 926   """Wrapper over L{_ExpandItemName} for nodes."""
 927   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 928
 929
 930 def _ExpandInstanceName(cfg, name):
 931   """Wrapper over L{_ExpandItemName} for instance."""
 932   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 933
 934
 935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 936                           memory, vcpus, nics, disk_template, disks,
 937                           bep, hvp, hypervisor_name, tags):
 938   """Builds instance related env variables for hooks
 939
 940   This builds the hook environment from individual variables.
 941
 942   @type name: string
 943   @param name: the name of the instance
 944   @type primary_node: string
 945   @param primary_node: the name of the instance's primary node
 946   @type secondary_nodes: list
 947   @param secondary_nodes: list of secondary nodes as strings
 948   @type os_type: string
 949   @param os_type: the name of the instance's OS
 950   @type status: boolean
 951   @param status: the should_run status of the instance
 952   @type memory: string
 953   @param memory: the memory size of the instance
 954   @type vcpus: string
 955   @param vcpus: the count of VCPUs the instance has
 956   @type nics: list
 957   @param nics: list of tuples (ip, mac, mode, link) representing
 958       the NICs the instance has
 959   @type disk_template: string
 960   @param disk_template: the disk template of the instance
 961   @type disks: list
 962   @param disks: the list of (size, mode) pairs
 963   @type bep: dict
 964   @param bep: the backend parameters for the instance
 965   @type hvp: dict
 966   @param hvp: the hypervisor parameters for the instance
 967   @type hypervisor_name: string
 968   @param hypervisor_name: the hypervisor for the instance
 969   @type tags: list
 970   @param tags: list of instance tags as strings
 971   @rtype: dict
 972   @return: the hook environment for this instance
 973
 974   """
 975   if status:
 976     str_status = "up"
 977   else:
 978     str_status = "down"
 979   env = {
 980     "OP_TARGET": name,
 981     "INSTANCE_NAME": name,
 982     "INSTANCE_PRIMARY": primary_node,
 983     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 984     "INSTANCE_OS_TYPE": os_type,
 985     "INSTANCE_STATUS": str_status,
 986     "INSTANCE_MEMORY": memory,
 987     "INSTANCE_VCPUS": vcpus,
 988     "INSTANCE_DISK_TEMPLATE": disk_template,
 989     "INSTANCE_HYPERVISOR": hypervisor_name,
 990   }
 991
 992   if nics:
 993     nic_count = len(nics)
 994     for idx, (ip, mac, mode, link) in enumerate(nics):
 995       if ip is None:
 996         ip = ""
 997       env["INSTANCE_NIC%d_IP" % idx] = ip
 998       env["INSTANCE_NIC%d_MAC" % idx] = mac
 999       env["INSTANCE_NIC%d_MODE" % idx] = mode
1000       env["INSTANCE_NIC%d_LINK" % idx] = link
1001       if mode == constants.NIC_MODE_BRIDGED:
1002         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003   else:
1004     nic_count = 0
1005
1006   env["INSTANCE_NIC_COUNT"] = nic_count
1007
1008   if disks:
1009     disk_count = len(disks)
1010     for idx, (size, mode) in enumerate(disks):
1011       env["INSTANCE_DISK%d_SIZE" % idx] = size
1012       env["INSTANCE_DISK%d_MODE" % idx] = mode
1013   else:
1014     disk_count = 0
1015
1016   env["INSTANCE_DISK_COUNT"] = disk_count
1017
1018   if not tags:
1019     tags = []
1020
1021   env["INSTANCE_TAGS"] = " ".join(tags)
1022
1023   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024     for key, value in source.items():
1025       env["INSTANCE_%s_%s" % (kind, key)] = value
1026
1027   return env
1028
1029
1030 def _NICListToTuple(lu, nics):
1031   """Build a list of nic information tuples.
1032
1033   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034   value in LUInstanceQueryData.
1035
1036   @type lu:  L{LogicalUnit}
1037   @param lu: the logical unit on whose behalf we execute
1038   @type nics: list of L{objects.NIC}
1039   @param nics: list of nics to convert to hooks tuples
1040
1041   """
1042   hooks_nics = []
1043   cluster = lu.cfg.GetClusterInfo()
1044   for nic in nics:
1045     ip = nic.ip
1046     mac = nic.mac
1047     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048     mode = filled_params[constants.NIC_MODE]
1049     link = filled_params[constants.NIC_LINK]
1050     hooks_nics.append((ip, mac, mode, link))
1051   return hooks_nics
1052
1053
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055   """Builds instance related env variables for hooks from an object.
1056
1057   @type lu: L{LogicalUnit}
1058   @param lu: the logical unit on whose behalf we execute
1059   @type instance: L{objects.Instance}
1060   @param instance: the instance for which we should build the
1061       environment
1062   @type override: dict
1063   @param override: dictionary with key/values that will override
1064       our values
1065   @rtype: dict
1066   @return: the hook environment dictionary
1067
1068   """
1069   cluster = lu.cfg.GetClusterInfo()
1070   bep = cluster.FillBE(instance)
1071   hvp = cluster.FillHV(instance)
1072   args = {
1073     "name": instance.name,
1074     "primary_node": instance.primary_node,
1075     "secondary_nodes": instance.secondary_nodes,
1076     "os_type": instance.os,
1077     "status": instance.admin_up,
1078     "memory": bep[constants.BE_MEMORY],
1079     "vcpus": bep[constants.BE_VCPUS],
1080     "nics": _NICListToTuple(lu, instance.nics),
1081     "disk_template": instance.disk_template,
1082     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083     "bep": bep,
1084     "hvp": hvp,
1085     "hypervisor_name": instance.hypervisor,
1086     "tags": instance.tags,
1087   }
1088   if override:
1089     args.update(override)
1090   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091
1092
1093 def _AdjustCandidatePool(lu, exceptions):
1094   """Adjust the candidate pool after node operations.
1095
1096   """
1097   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098   if mod_list:
1099     lu.LogInfo("Promoted nodes to master candidate role: %s",
1100                utils.CommaJoin(node.name for node in mod_list))
1101     for name in mod_list:
1102       lu.context.ReaddNode(name)
1103   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104   if mc_now > mc_max:
1105     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106                (mc_now, mc_max))
1107
1108
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110   """Decide whether I should promote myself as a master candidate.
1111
1112   """
1113   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115   # the new node will increase mc_max with one, so:
1116   mc_should = min(mc_should + 1, cp_size)
1117   return mc_now < mc_should
1118
1119
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121   """Check that the brigdes needed by a list of nics exist.
1122
1123   """
1124   cluster = lu.cfg.GetClusterInfo()
1125   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126   brlist = [params[constants.NIC_LINK] for params in paramslist
1127             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128   if brlist:
1129     result = lu.rpc.call_bridges_exist(target_node, brlist)
1130     result.Raise("Error checking bridges on destination node '%s'" %
1131                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132
1133
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135   """Check that the brigdes needed by an instance exist.
1136
1137   """
1138   if node is None:
1139     node = instance.primary_node
1140   _CheckNicsBridgesExist(lu, instance.nics, node)
1141
1142
1143 def _CheckOSVariant(os_obj, name):
1144   """Check whether an OS name conforms to the os variants specification.
1145
1146   @type os_obj: L{objects.OS}
1147   @param os_obj: OS object to check
1148   @type name: string
1149   @param name: OS name passed by the user, to check for validity
1150
1151   """
1152   variant = objects.OS.GetVariant(name)
1153   if not os_obj.supported_variants:
1154     if variant:
1155       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156                                  " passed)" % (os_obj.name, variant),
1157                                  errors.ECODE_INVAL)
1158     return
1159   if not variant:
1160     raise errors.OpPrereqError("OS name must include a variant",
1161                                errors.ECODE_INVAL)
1162
1163   if variant not in os_obj.supported_variants:
1164     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165
1166
1167 def _GetNodeInstancesInner(cfg, fn):
1168   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169
1170
1171 def _GetNodeInstances(cfg, node_name):
1172   """Returns a list of all primary and secondary instances on a node.
1173
1174   """
1175
1176   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177
1178
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180   """Returns primary instances on a node.
1181
1182   """
1183   return _GetNodeInstancesInner(cfg,
1184                                 lambda inst: node_name == inst.primary_node)
1185
1186
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188   """Returns secondary instances on a node.
1189
1190   """
1191   return _GetNodeInstancesInner(cfg,
1192                                 lambda inst: node_name in inst.secondary_nodes)
1193
1194
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196   """Returns the arguments for a storage type.
1197
1198   """
1199   # Special case for file storage
1200   if storage_type == constants.ST_FILE:
1201     # storage.FileStorage wants a list of storage directories
1202     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203
1204   return []
1205
1206
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208   faulty = []
1209
1210   for dev in instance.disks:
1211     cfg.SetDiskID(dev, node_name)
1212
1213   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214   result.Raise("Failed to get disk status from node %s" % node_name,
1215                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216
1217   for idx, bdev_status in enumerate(result.payload):
1218     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219       faulty.append(idx)
1220
1221   return faulty
1222
1223
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225   """Check the sanity of iallocator and node arguments and use the
1226   cluster-wide iallocator if appropriate.
1227
1228   Check that at most one of (iallocator, node) is specified. If none is
1229   specified, then the LU's opcode's iallocator slot is filled with the
1230   cluster-wide default iallocator.
1231
1232   @type iallocator_slot: string
1233   @param iallocator_slot: the name of the opcode iallocator slot
1234   @type node_slot: string
1235   @param node_slot: the name of the opcode target node slot
1236
1237   """
1238   node = getattr(lu.op, node_slot, None)
1239   iallocator = getattr(lu.op, iallocator_slot, None)
1240
1241   if node is not None and iallocator is not None:
1242     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243                                errors.ECODE_INVAL)
1244   elif node is None and iallocator is None:
1245     default_iallocator = lu.cfg.GetDefaultIAllocator()
1246     if default_iallocator:
1247       setattr(lu.op, iallocator_slot, default_iallocator)
1248     else:
1249       raise errors.OpPrereqError("No iallocator or node given and no"
1250                                  " cluster-wide default iallocator found;"
1251                                  " please specify either an iallocator or a"
1252                                  " node, or set a cluster-wide default"
1253                                  " iallocator")
1254
1255
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257   """Decides on which iallocator to use.
1258
1259   @type cfg: L{config.ConfigWriter}
1260   @param cfg: Cluster configuration object
1261   @type iallocator: string or None
1262   @param iallocator: Iallocator specified in opcode
1263   @rtype: string
1264   @return: Iallocator name
1265
1266   """
1267   if not iallocator:
1268     # Use default iallocator
1269     iallocator = cfg.GetDefaultIAllocator()
1270
1271   if not iallocator:
1272     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273                                " opcode nor as a cluster-wide default",
1274                                errors.ECODE_INVAL)
1275
1276   return iallocator
1277
1278
1279 class LUClusterPostInit(LogicalUnit):
1280   """Logical unit for running hooks after cluster initialization.
1281
1282   """
1283   HPATH = "cluster-init"
1284   HTYPE = constants.HTYPE_CLUSTER
1285
1286   def BuildHooksEnv(self):
1287     """Build hooks env.
1288
1289     """
1290     return {
1291       "OP_TARGET": self.cfg.GetClusterName(),
1292       }
1293
1294   def BuildHooksNodes(self):
1295     """Build hooks nodes.
1296
1297     """
1298     return ([], [self.cfg.GetMasterNode()])
1299
1300   def Exec(self, feedback_fn):
1301     """Nothing to do.
1302
1303     """
1304     return True
1305
1306
1307 class LUClusterDestroy(LogicalUnit):
1308   """Logical unit for destroying the cluster.
1309
1310   """
1311   HPATH = "cluster-destroy"
1312   HTYPE = constants.HTYPE_CLUSTER
1313
1314   def BuildHooksEnv(self):
1315     """Build hooks env.
1316
1317     """
1318     return {
1319       "OP_TARGET": self.cfg.GetClusterName(),
1320       }
1321
1322   def BuildHooksNodes(self):
1323     """Build hooks nodes.
1324
1325     """
1326     return ([], [])
1327
1328   def CheckPrereq(self):
1329     """Check prerequisites.
1330
1331     This checks whether the cluster is empty.
1332
1333     Any errors are signaled by raising errors.OpPrereqError.
1334
1335     """
1336     master = self.cfg.GetMasterNode()
1337
1338     nodelist = self.cfg.GetNodeList()
1339     if len(nodelist) != 1 or nodelist[0] != master:
1340       raise errors.OpPrereqError("There are still %d node(s) in"
1341                                  " this cluster." % (len(nodelist) - 1),
1342                                  errors.ECODE_INVAL)
1343     instancelist = self.cfg.GetInstanceList()
1344     if instancelist:
1345       raise errors.OpPrereqError("There are still %d instance(s) in"
1346                                  " this cluster." % len(instancelist),
1347                                  errors.ECODE_INVAL)
1348
1349   def Exec(self, feedback_fn):
1350     """Destroys the cluster.
1351
1352     """
1353     master = self.cfg.GetMasterNode()
1354
1355     # Run post hooks on master node before it's removed
1356     _RunPostHook(self, master)
1357
1358     result = self.rpc.call_node_deactivate_master_ip(master)
1359     result.Raise("Could not disable the master role")
1360
1361     return master
1362
1363
1364 def _VerifyCertificate(filename):
1365   """Verifies a certificate for L{LUClusterVerifyConfig}.
1366
1367   @type filename: string
1368   @param filename: Path to PEM file
1369
1370   """
1371   try:
1372     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373                                            utils.ReadFile(filename))
1374   except Exception, err: # pylint: disable=W0703
1375     return (LUClusterVerifyConfig.ETYPE_ERROR,
1376             "Failed to load X509 certificate %s: %s" % (filename, err))
1377
1378   (errcode, msg) = \
1379     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380                                 constants.SSL_CERT_EXPIRATION_ERROR)
1381
1382   if msg:
1383     fnamemsg = "While verifying %s: %s" % (filename, msg)
1384   else:
1385     fnamemsg = None
1386
1387   if errcode is None:
1388     return (None, fnamemsg)
1389   elif errcode == utils.CERT_WARNING:
1390     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391   elif errcode == utils.CERT_ERROR:
1392     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393
1394   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395
1396
1397 def _GetAllHypervisorParameters(cluster, instances):
1398   """Compute the set of all hypervisor parameters.
1399
1400   @type cluster: L{objects.Cluster}
1401   @param cluster: the cluster object
1402   @param instances: list of L{objects.Instance}
1403   @param instances: additional instances from which to obtain parameters
1404   @rtype: list of (origin, hypervisor, parameters)
1405   @return: a list with all parameters found, indicating the hypervisor they
1406        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407
1408   """
1409   hvp_data = []
1410
1411   for hv_name in cluster.enabled_hypervisors:
1412     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413
1414   for os_name, os_hvp in cluster.os_hvp.items():
1415     for hv_name, hv_params in os_hvp.items():
1416       if hv_params:
1417         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419
1420   # TODO: collapse identical parameter values in a single one
1421   for instance in instances:
1422     if instance.hvparams:
1423       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424                        cluster.FillHV(instance)))
1425
1426   return hvp_data
1427
1428
1429 class _VerifyErrors(object):
1430   """Mix-in for cluster/group verify LUs.
1431
1432   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433   self.op and self._feedback_fn to be available.)
1434
1435   """
1436
1437   ETYPE_FIELD = "code"
1438   ETYPE_ERROR = "ERROR"
1439   ETYPE_WARNING = "WARNING"
1440
1441   def _Error(self, ecode, item, msg, *args, **kwargs):
1442     """Format an error message.
1443
1444     Based on the opcode's error_codes parameter, either format a
1445     parseable error code, or a simpler error string.
1446
1447     This must be called only from Exec and functions called from Exec.
1448
1449     """
1450     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1451     itype, etxt, _ = ecode
1452     # first complete the msg
1453     if args:
1454       msg = msg % args
1455     # then format the whole message
1456     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1457       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1458     else:
1459       if item:
1460         item = " " + item
1461       else:
1462         item = ""
1463       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1464     # and finally report it via the feedback_fn
1465     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1466
1467   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1468     """Log an error message if the passed condition is True.
1469
1470     """
1471     cond = (bool(cond)
1472             or self.op.debug_simulate_errors) # pylint: disable=E1101
1473
1474     # If the error code is in the list of ignored errors, demote the error to a
1475     # warning
1476     (_, etxt, _) = ecode
1477     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1478       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1479
1480     if cond:
1481       self._Error(ecode, *args, **kwargs)
1482
1483     # do not mark the operation as failed for WARN cases only
1484     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1485       self.bad = self.bad or cond
1486
1487
1488 class LUClusterVerify(NoHooksLU):
1489   """Submits all jobs necessary to verify the cluster.
1490
1491   """
1492   REQ_BGL = False
1493
1494   def ExpandNames(self):
1495     self.needed_locks = {}
1496
1497   def Exec(self, feedback_fn):
1498     jobs = []
1499
1500     if self.op.group_name:
1501       groups = [self.op.group_name]
1502       depends_fn = lambda: None
1503     else:
1504       groups = self.cfg.GetNodeGroupList()
1505
1506       # Verify global configuration
1507       jobs.append([
1508         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1509         ])
1510
1511       # Always depend on global verification
1512       depends_fn = lambda: [(-len(jobs), [])]
1513
1514     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1515                                             ignore_errors=self.op.ignore_errors,
1516                                             depends=depends_fn())]
1517                 for group in groups)
1518
1519     # Fix up all parameters
1520     for op in itertools.chain(*jobs): # pylint: disable=W0142
1521       op.debug_simulate_errors = self.op.debug_simulate_errors
1522       op.verbose = self.op.verbose
1523       op.error_codes = self.op.error_codes
1524       try:
1525         op.skip_checks = self.op.skip_checks
1526       except AttributeError:
1527         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1528
1529     return ResultWithJobs(jobs)
1530
1531
1532 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1533   """Verifies the cluster config.
1534
1535   """
1536   REQ_BGL = True
1537
1538   def _VerifyHVP(self, hvp_data):
1539     """Verifies locally the syntax of the hypervisor parameters.
1540
1541     """
1542     for item, hv_name, hv_params in hvp_data:
1543       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1544              (item, hv_name))
1545       try:
1546         hv_class = hypervisor.GetHypervisor(hv_name)
1547         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1548         hv_class.CheckParameterSyntax(hv_params)
1549       except errors.GenericError, err:
1550         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1551
1552   def ExpandNames(self):
1553     # Information can be safely retrieved as the BGL is acquired in exclusive
1554     # mode
1555     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1556     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1557     self.all_node_info = self.cfg.GetAllNodesInfo()
1558     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1559     self.needed_locks = {}
1560
1561   def Exec(self, feedback_fn):
1562     """Verify integrity of cluster, performing various test on nodes.
1563
1564     """
1565     self.bad = False
1566     self._feedback_fn = feedback_fn
1567
1568     feedback_fn("* Verifying cluster config")
1569
1570     for msg in self.cfg.VerifyConfig():
1571       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1572
1573     feedback_fn("* Verifying cluster certificate files")
1574
1575     for cert_filename in constants.ALL_CERT_FILES:
1576       (errcode, msg) = _VerifyCertificate(cert_filename)
1577       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1578
1579     feedback_fn("* Verifying hypervisor parameters")
1580
1581     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1582                                                 self.all_inst_info.values()))
1583
1584     feedback_fn("* Verifying all nodes belong to an existing group")
1585
1586     # We do this verification here because, should this bogus circumstance
1587     # occur, it would never be caught by VerifyGroup, which only acts on
1588     # nodes/instances reachable from existing node groups.
1589
1590     dangling_nodes = set(node.name for node in self.all_node_info.values()
1591                          if node.group not in self.all_group_info)
1592
1593     dangling_instances = {}
1594     no_node_instances = []
1595
1596     for inst in self.all_inst_info.values():
1597       if inst.primary_node in dangling_nodes:
1598         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1599       elif inst.primary_node not in self.all_node_info:
1600         no_node_instances.append(inst.name)
1601
1602     pretty_dangling = [
1603         "%s (%s)" %
1604         (node.name,
1605          utils.CommaJoin(dangling_instances.get(node.name,
1606                                                 ["no instances"])))
1607         for node in dangling_nodes]
1608
1609     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1610                   None,
1611                   "the following nodes (and their instances) belong to a non"
1612                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1613
1614     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1615                   None,
1616                   "the following instances have a non-existing primary-node:"
1617                   " %s", utils.CommaJoin(no_node_instances))
1618
1619     return not self.bad
1620
1621
1622 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1623   """Verifies the status of a node group.
1624
1625   """
1626   HPATH = "cluster-verify"
1627   HTYPE = constants.HTYPE_CLUSTER
1628   REQ_BGL = False
1629
1630   _HOOKS_INDENT_RE = re.compile("^", re.M)
1631
1632   class NodeImage(object):
1633     """A class representing the logical and physical status of a node.
1634
1635     @type name: string
1636     @ivar name: the node name to which this object refers
1637     @ivar volumes: a structure as returned from
1638         L{ganeti.backend.GetVolumeList} (runtime)
1639     @ivar instances: a list of running instances (runtime)
1640     @ivar pinst: list of configured primary instances (config)
1641     @ivar sinst: list of configured secondary instances (config)
1642     @ivar sbp: dictionary of {primary-node: list of instances} for all
1643         instances for which this node is secondary (config)
1644     @ivar mfree: free memory, as reported by hypervisor (runtime)
1645     @ivar dfree: free disk, as reported by the node (runtime)
1646     @ivar offline: the offline status (config)
1647     @type rpc_fail: boolean
1648     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1649         not whether the individual keys were correct) (runtime)
1650     @type lvm_fail: boolean
1651     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1652     @type hyp_fail: boolean
1653     @ivar hyp_fail: whether the RPC call didn't return the instance list
1654     @type ghost: boolean
1655     @ivar ghost: whether this is a known node or not (config)
1656     @type os_fail: boolean
1657     @ivar os_fail: whether the RPC call didn't return valid OS data
1658     @type oslist: list
1659     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1660     @type vm_capable: boolean
1661     @ivar vm_capable: whether the node can host instances
1662
1663     """
1664     def __init__(self, offline=False, name=None, vm_capable=True):
1665       self.name = name
1666       self.volumes = {}
1667       self.instances = []
1668       self.pinst = []
1669       self.sinst = []
1670       self.sbp = {}
1671       self.mfree = 0
1672       self.dfree = 0
1673       self.offline = offline
1674       self.vm_capable = vm_capable
1675       self.rpc_fail = False
1676       self.lvm_fail = False
1677       self.hyp_fail = False
1678       self.ghost = False
1679       self.os_fail = False
1680       self.oslist = {}
1681
1682   def ExpandNames(self):
1683     # This raises errors.OpPrereqError on its own:
1684     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1685
1686     # Get instances in node group; this is unsafe and needs verification later
1687     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1688
1689     self.needed_locks = {
1690       locking.LEVEL_INSTANCE: inst_names,
1691       locking.LEVEL_NODEGROUP: [self.group_uuid],
1692       locking.LEVEL_NODE: [],
1693       }
1694
1695     self.share_locks = _ShareAll()
1696
1697   def DeclareLocks(self, level):
1698     if level == locking.LEVEL_NODE:
1699       # Get members of node group; this is unsafe and needs verification later
1700       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1701
1702       all_inst_info = self.cfg.GetAllInstancesInfo()
1703
1704       # In Exec(), we warn about mirrored instances that have primary and
1705       # secondary living in separate node groups. To fully verify that
1706       # volumes for these instances are healthy, we will need to do an
1707       # extra call to their secondaries. We ensure here those nodes will
1708       # be locked.
1709       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1710         # Important: access only the instances whose lock is owned
1711         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1712           nodes.update(all_inst_info[inst].secondary_nodes)
1713
1714       self.needed_locks[locking.LEVEL_NODE] = nodes
1715
1716   def CheckPrereq(self):
1717     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1718     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1719
1720     group_nodes = set(self.group_info.members)
1721     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1722
1723     unlocked_nodes = \
1724         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1725
1726     unlocked_instances = \
1727         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1728
1729     if unlocked_nodes:
1730       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1731                                  utils.CommaJoin(unlocked_nodes))
1732
1733     if unlocked_instances:
1734       raise errors.OpPrereqError("Missing lock for instances: %s" %
1735                                  utils.CommaJoin(unlocked_instances))
1736
1737     self.all_node_info = self.cfg.GetAllNodesInfo()
1738     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1739
1740     self.my_node_names = utils.NiceSort(group_nodes)
1741     self.my_inst_names = utils.NiceSort(group_instances)
1742
1743     self.my_node_info = dict((name, self.all_node_info[name])
1744                              for name in self.my_node_names)
1745
1746     self.my_inst_info = dict((name, self.all_inst_info[name])
1747                              for name in self.my_inst_names)
1748
1749     # We detect here the nodes that will need the extra RPC calls for verifying
1750     # split LV volumes; they should be locked.
1751     extra_lv_nodes = set()
1752
1753     for inst in self.my_inst_info.values():
1754       if inst.disk_template in constants.DTS_INT_MIRROR:
1755         group = self.my_node_info[inst.primary_node].group
1756         for nname in inst.secondary_nodes:
1757           if self.all_node_info[nname].group != group:
1758             extra_lv_nodes.add(nname)
1759
1760     unlocked_lv_nodes = \
1761         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1762
1763     if unlocked_lv_nodes:
1764       raise errors.OpPrereqError("these nodes could be locked: %s" %
1765                                  utils.CommaJoin(unlocked_lv_nodes))
1766     self.extra_lv_nodes = list(extra_lv_nodes)
1767
1768   def _VerifyNode(self, ninfo, nresult):
1769     """Perform some basic validation on data returned from a node.
1770
1771       - check the result data structure is well formed and has all the
1772         mandatory fields
1773       - check ganeti version
1774
1775     @type ninfo: L{objects.Node}
1776     @param ninfo: the node to check
1777     @param nresult: the results from the node
1778     @rtype: boolean
1779     @return: whether overall this call was successful (and we can expect
1780          reasonable values in the respose)
1781
1782     """
1783     node = ninfo.name
1784     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1785
1786     # main result, nresult should be a non-empty dict
1787     test = not nresult or not isinstance(nresult, dict)
1788     _ErrorIf(test, constants.CV_ENODERPC, node,
1789                   "unable to verify node: no data returned")
1790     if test:
1791       return False
1792
1793     # compares ganeti version
1794     local_version = constants.PROTOCOL_VERSION
1795     remote_version = nresult.get("version", None)
1796     test = not (remote_version and
1797                 isinstance(remote_version, (list, tuple)) and
1798                 len(remote_version) == 2)
1799     _ErrorIf(test, constants.CV_ENODERPC, node,
1800              "connection to node returned invalid data")
1801     if test:
1802       return False
1803
1804     test = local_version != remote_version[0]
1805     _ErrorIf(test, constants.CV_ENODEVERSION, node,
1806              "incompatible protocol versions: master %s,"
1807              " node %s", local_version, remote_version[0])
1808     if test:
1809       return False
1810
1811     # node seems compatible, we can actually try to look into its results
1812
1813     # full package version
1814     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1815                   constants.CV_ENODEVERSION, node,
1816                   "software version mismatch: master %s, node %s",
1817                   constants.RELEASE_VERSION, remote_version[1],
1818                   code=self.ETYPE_WARNING)
1819
1820     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1821     if ninfo.vm_capable and isinstance(hyp_result, dict):
1822       for hv_name, hv_result in hyp_result.iteritems():
1823         test = hv_result is not None
1824         _ErrorIf(test, constants.CV_ENODEHV, node,
1825                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1826
1827     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1828     if ninfo.vm_capable and isinstance(hvp_result, list):
1829       for item, hv_name, hv_result in hvp_result:
1830         _ErrorIf(True, constants.CV_ENODEHV, node,
1831                  "hypervisor %s parameter verify failure (source %s): %s",
1832                  hv_name, item, hv_result)
1833
1834     test = nresult.get(constants.NV_NODESETUP,
1835                        ["Missing NODESETUP results"])
1836     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1837              "; ".join(test))
1838
1839     return True
1840
1841   def _VerifyNodeTime(self, ninfo, nresult,
1842                       nvinfo_starttime, nvinfo_endtime):
1843     """Check the node time.
1844
1845     @type ninfo: L{objects.Node}
1846     @param ninfo: the node to check
1847     @param nresult: the remote results for the node
1848     @param nvinfo_starttime: the start time of the RPC call
1849     @param nvinfo_endtime: the end time of the RPC call
1850
1851     """
1852     node = ninfo.name
1853     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1854
1855     ntime = nresult.get(constants.NV_TIME, None)
1856     try:
1857       ntime_merged = utils.MergeTime(ntime)
1858     except (ValueError, TypeError):
1859       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1860       return
1861
1862     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1863       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1864     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1865       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1866     else:
1867       ntime_diff = None
1868
1869     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1870              "Node time diverges by at least %s from master node time",
1871              ntime_diff)
1872
1873   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1874     """Check the node LVM results.
1875
1876     @type ninfo: L{objects.Node}
1877     @param ninfo: the node to check
1878     @param nresult: the remote results for the node
1879     @param vg_name: the configured VG name
1880
1881     """
1882     if vg_name is None:
1883       return
1884
1885     node = ninfo.name
1886     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1887
1888     # checks vg existence and size > 20G
1889     vglist = nresult.get(constants.NV_VGLIST, None)
1890     test = not vglist
1891     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1892     if not test:
1893       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1894                                             constants.MIN_VG_SIZE)
1895       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1896
1897     # check pv names
1898     pvlist = nresult.get(constants.NV_PVLIST, None)
1899     test = pvlist is None
1900     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1901     if not test:
1902       # check that ':' is not present in PV names, since it's a
1903       # special character for lvcreate (denotes the range of PEs to
1904       # use on the PV)
1905       for _, pvname, owner_vg in pvlist:
1906         test = ":" in pvname
1907         _ErrorIf(test, constants.CV_ENODELVM, node,
1908                  "Invalid character ':' in PV '%s' of VG '%s'",
1909                  pvname, owner_vg)
1910
1911   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1912     """Check the node bridges.
1913
1914     @type ninfo: L{objects.Node}
1915     @param ninfo: the node to check
1916     @param nresult: the remote results for the node
1917     @param bridges: the expected list of bridges
1918
1919     """
1920     if not bridges:
1921       return
1922
1923     node = ninfo.name
1924     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1925
1926     missing = nresult.get(constants.NV_BRIDGES, None)
1927     test = not isinstance(missing, list)
1928     _ErrorIf(test, constants.CV_ENODENET, node,
1929              "did not return valid bridge information")
1930     if not test:
1931       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1932                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1933
1934   def _VerifyNodeNetwork(self, ninfo, nresult):
1935     """Check the node network connectivity results.
1936
1937     @type ninfo: L{objects.Node}
1938     @param ninfo: the node to check
1939     @param nresult: the remote results for the node
1940
1941     """
1942     node = ninfo.name
1943     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1944
1945     test = constants.NV_NODELIST not in nresult
1946     _ErrorIf(test, constants.CV_ENODESSH, node,
1947              "node hasn't returned node ssh connectivity data")
1948     if not test:
1949       if nresult[constants.NV_NODELIST]:
1950         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1951           _ErrorIf(True, constants.CV_ENODESSH, node,
1952                    "ssh communication with node '%s': %s", a_node, a_msg)
1953
1954     test = constants.NV_NODENETTEST not in nresult
1955     _ErrorIf(test, constants.CV_ENODENET, node,
1956              "node hasn't returned node tcp connectivity data")
1957     if not test:
1958       if nresult[constants.NV_NODENETTEST]:
1959         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1960         for anode in nlist:
1961           _ErrorIf(True, constants.CV_ENODENET, node,
1962                    "tcp communication with node '%s': %s",
1963                    anode, nresult[constants.NV_NODENETTEST][anode])
1964
1965     test = constants.NV_MASTERIP not in nresult
1966     _ErrorIf(test, constants.CV_ENODENET, node,
1967              "node hasn't returned node master IP reachability data")
1968     if not test:
1969       if not nresult[constants.NV_MASTERIP]:
1970         if node == self.master_node:
1971           msg = "the master node cannot reach the master IP (not configured?)"
1972         else:
1973           msg = "cannot reach the master IP"
1974         _ErrorIf(True, constants.CV_ENODENET, node, msg)
1975
1976   def _VerifyInstance(self, instance, instanceconfig, node_image,
1977                       diskstatus):
1978     """Verify an instance.
1979
1980     This function checks to see if the required block devices are
1981     available on the instance's node.
1982
1983     """
1984     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1985     node_current = instanceconfig.primary_node
1986
1987     node_vol_should = {}
1988     instanceconfig.MapLVsByNode(node_vol_should)
1989
1990     for node in node_vol_should:
1991       n_img = node_image[node]
1992       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1993         # ignore missing volumes on offline or broken nodes
1994         continue
1995       for volume in node_vol_should[node]:
1996         test = volume not in n_img.volumes
1997         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
1998                  "volume %s missing on node %s", volume, node)
1999
2000     if instanceconfig.admin_up:
2001       pri_img = node_image[node_current]
2002       test = instance not in pri_img.instances and not pri_img.offline
2003       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2004                "instance not running on its primary node %s",
2005                node_current)
2006
2007     diskdata = [(nname, success, status, idx)
2008                 for (nname, disks) in diskstatus.items()
2009                 for idx, (success, status) in enumerate(disks)]
2010
2011     for nname, success, bdev_status, idx in diskdata:
2012       # the 'ghost node' construction in Exec() ensures that we have a
2013       # node here
2014       snode = node_image[nname]
2015       bad_snode = snode.ghost or snode.offline
2016       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2017                constants.CV_EINSTANCEFAULTYDISK, instance,
2018                "couldn't retrieve status for disk/%s on %s: %s",
2019                idx, nname, bdev_status)
2020       _ErrorIf((instanceconfig.admin_up and success and
2021                 bdev_status.ldisk_status == constants.LDS_FAULTY),
2022                constants.CV_EINSTANCEFAULTYDISK, instance,
2023                "disk/%s on %s is faulty", idx, nname)
2024
2025   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2026     """Verify if there are any unknown volumes in the cluster.
2027
2028     The .os, .swap and backup volumes are ignored. All other volumes are
2029     reported as unknown.
2030
2031     @type reserved: L{ganeti.utils.FieldSet}
2032     @param reserved: a FieldSet of reserved volume names
2033
2034     """
2035     for node, n_img in node_image.items():
2036       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2037         # skip non-healthy nodes
2038         continue
2039       for volume in n_img.volumes:
2040         test = ((node not in node_vol_should or
2041                 volume not in node_vol_should[node]) and
2042                 not reserved.Matches(volume))
2043         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2044                       "volume %s is unknown", volume)
2045
2046   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2047     """Verify N+1 Memory Resilience.
2048
2049     Check that if one single node dies we can still start all the
2050     instances it was primary for.
2051
2052     """
2053     cluster_info = self.cfg.GetClusterInfo()
2054     for node, n_img in node_image.items():
2055       # This code checks that every node which is now listed as
2056       # secondary has enough memory to host all instances it is
2057       # supposed to should a single other node in the cluster fail.
2058       # FIXME: not ready for failover to an arbitrary node
2059       # FIXME: does not support file-backed instances
2060       # WARNING: we currently take into account down instances as well
2061       # as up ones, considering that even if they're down someone
2062       # might want to start them even in the event of a node failure.
2063       if n_img.offline:
2064         # we're skipping offline nodes from the N+1 warning, since
2065         # most likely we don't have good memory infromation from them;
2066         # we already list instances living on such nodes, and that's
2067         # enough warning
2068         continue
2069       for prinode, instances in n_img.sbp.items():
2070         needed_mem = 0
2071         for instance in instances:
2072           bep = cluster_info.FillBE(instance_cfg[instance])
2073           if bep[constants.BE_AUTO_BALANCE]:
2074             needed_mem += bep[constants.BE_MEMORY]
2075         test = n_img.mfree < needed_mem
2076         self._ErrorIf(test, constants.CV_ENODEN1, node,
2077                       "not enough memory to accomodate instance failovers"
2078                       " should node %s fail (%dMiB needed, %dMiB available)",
2079                       prinode, needed_mem, n_img.mfree)
2080
2081   @classmethod
2082   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2083                    (files_all, files_all_opt, files_mc, files_vm)):
2084     """Verifies file checksums collected from all nodes.
2085
2086     @param errorif: Callback for reporting errors
2087     @param nodeinfo: List of L{objects.Node} objects
2088     @param master_node: Name of master node
2089     @param all_nvinfo: RPC results
2090
2091     """
2092     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2093             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2094            "Found file listed in more than one file list"
2095
2096     # Define functions determining which nodes to consider for a file
2097     files2nodefn = [
2098       (files_all, None),
2099       (files_all_opt, None),
2100       (files_mc, lambda node: (node.master_candidate or
2101                                node.name == master_node)),
2102       (files_vm, lambda node: node.vm_capable),
2103       ]
2104
2105     # Build mapping from filename to list of nodes which should have the file
2106     nodefiles = {}
2107     for (files, fn) in files2nodefn:
2108       if fn is None:
2109         filenodes = nodeinfo
2110       else:
2111         filenodes = filter(fn, nodeinfo)
2112       nodefiles.update((filename,
2113                         frozenset(map(operator.attrgetter("name"), filenodes)))
2114                        for filename in files)
2115
2116     assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2117
2118     fileinfo = dict((filename, {}) for filename in nodefiles)
2119     ignore_nodes = set()
2120
2121     for node in nodeinfo:
2122       if node.offline:
2123         ignore_nodes.add(node.name)
2124         continue
2125
2126       nresult = all_nvinfo[node.name]
2127
2128       if nresult.fail_msg or not nresult.payload:
2129         node_files = None
2130       else:
2131         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2132
2133       test = not (node_files and isinstance(node_files, dict))
2134       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2135               "Node did not return file checksum data")
2136       if test:
2137         ignore_nodes.add(node.name)
2138         continue
2139
2140       # Build per-checksum mapping from filename to nodes having it
2141       for (filename, checksum) in node_files.items():
2142         assert filename in nodefiles
2143         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2144
2145     for (filename, checksums) in fileinfo.items():
2146       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2147
2148       # Nodes having the file
2149       with_file = frozenset(node_name
2150                             for nodes in fileinfo[filename].values()
2151                             for node_name in nodes) - ignore_nodes
2152
2153       expected_nodes = nodefiles[filename] - ignore_nodes
2154
2155       # Nodes missing file
2156       missing_file = expected_nodes - with_file
2157
2158       if filename in files_all_opt:
2159         # All or no nodes
2160         errorif(missing_file and missing_file != expected_nodes,
2161                 constants.CV_ECLUSTERFILECHECK, None,
2162                 "File %s is optional, but it must exist on all or no"
2163                 " nodes (not found on %s)",
2164                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2165       else:
2166         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2167                 "File %s is missing from node(s) %s", filename,
2168                 utils.CommaJoin(utils.NiceSort(missing_file)))
2169
2170         # Warn if a node has a file it shouldn't
2171         unexpected = with_file - expected_nodes
2172         errorif(unexpected,
2173                 constants.CV_ECLUSTERFILECHECK, None,
2174                 "File %s should not exist on node(s) %s",
2175                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2176
2177       # See if there are multiple versions of the file
2178       test = len(checksums) > 1
2179       if test:
2180         variants = ["variant %s on %s" %
2181                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2182                     for (idx, (checksum, nodes)) in
2183                       enumerate(sorted(checksums.items()))]
2184       else:
2185         variants = []
2186
2187       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2188               "File %s found with %s different checksums (%s)",
2189               filename, len(checksums), "; ".join(variants))
2190
2191   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2192                       drbd_map):
2193     """Verifies and the node DRBD status.
2194
2195     @type ninfo: L{objects.Node}
2196     @param ninfo: the node to check
2197     @param nresult: the remote results for the node
2198     @param instanceinfo: the dict of instances
2199     @param drbd_helper: the configured DRBD usermode helper
2200     @param drbd_map: the DRBD map as returned by
2201         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2202
2203     """
2204     node = ninfo.name
2205     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2206
2207     if drbd_helper:
2208       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2209       test = (helper_result == None)
2210       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2211                "no drbd usermode helper returned")
2212       if helper_result:
2213         status, payload = helper_result
2214         test = not status
2215         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2216                  "drbd usermode helper check unsuccessful: %s", payload)
2217         test = status and (payload != drbd_helper)
2218         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2219                  "wrong drbd usermode helper: %s", payload)
2220
2221     # compute the DRBD minors
2222     node_drbd = {}
2223     for minor, instance in drbd_map[node].items():
2224       test = instance not in instanceinfo
2225       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2226                "ghost instance '%s' in temporary DRBD map", instance)
2227         # ghost instance should not be running, but otherwise we
2228         # don't give double warnings (both ghost instance and
2229         # unallocated minor in use)
2230       if test:
2231         node_drbd[minor] = (instance, False)
2232       else:
2233         instance = instanceinfo[instance]
2234         node_drbd[minor] = (instance.name, instance.admin_up)
2235
2236     # and now check them
2237     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2238     test = not isinstance(used_minors, (tuple, list))
2239     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2240              "cannot parse drbd status file: %s", str(used_minors))
2241     if test:
2242       # we cannot check drbd status
2243       return
2244
2245     for minor, (iname, must_exist) in node_drbd.items():
2246       test = minor not in used_minors and must_exist
2247       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2248                "drbd minor %d of instance %s is not active", minor, iname)
2249     for minor in used_minors:
2250       test = minor not in node_drbd
2251       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2252                "unallocated drbd minor %d is in use", minor)
2253
2254   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2255     """Builds the node OS structures.
2256
2257     @type ninfo: L{objects.Node}
2258     @param ninfo: the node to check
2259     @param nresult: the remote results for the node
2260     @param nimg: the node image object
2261
2262     """
2263     node = ninfo.name
2264     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2265
2266     remote_os = nresult.get(constants.NV_OSLIST, None)
2267     test = (not isinstance(remote_os, list) or
2268             not compat.all(isinstance(v, list) and len(v) == 7
2269                            for v in remote_os))
2270
2271     _ErrorIf(test, constants.CV_ENODEOS, node,
2272              "node hasn't returned valid OS data")
2273
2274     nimg.os_fail = test
2275
2276     if test:
2277       return
2278
2279     os_dict = {}
2280
2281     for (name, os_path, status, diagnose,
2282          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2283
2284       if name not in os_dict:
2285         os_dict[name] = []
2286
2287       # parameters is a list of lists instead of list of tuples due to
2288       # JSON lacking a real tuple type, fix it:
2289       parameters = [tuple(v) for v in parameters]
2290       os_dict[name].append((os_path, status, diagnose,
2291                             set(variants), set(parameters), set(api_ver)))
2292
2293     nimg.oslist = os_dict
2294
2295   def _VerifyNodeOS(self, ninfo, nimg, base):
2296     """Verifies the node OS list.
2297
2298     @type ninfo: L{objects.Node}
2299     @param ninfo: the node to check
2300     @param nimg: the node image object
2301     @param base: the 'template' node we match against (e.g. from the master)
2302
2303     """
2304     node = ninfo.name
2305     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2306
2307     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2308
2309     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2310     for os_name, os_data in nimg.oslist.items():
2311       assert os_data, "Empty OS status for OS %s?!" % os_name
2312       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2313       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2314                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2315       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2316                "OS '%s' has multiple entries (first one shadows the rest): %s",
2317                os_name, utils.CommaJoin([v[0] for v in os_data]))
2318       # comparisons with the 'base' image
2319       test = os_name not in base.oslist
2320       _ErrorIf(test, constants.CV_ENODEOS, node,
2321                "Extra OS %s not present on reference node (%s)",
2322                os_name, base.name)
2323       if test:
2324         continue
2325       assert base.oslist[os_name], "Base node has empty OS status?"
2326       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2327       if not b_status:
2328         # base OS is invalid, skipping
2329         continue
2330       for kind, a, b in [("API version", f_api, b_api),
2331                          ("variants list", f_var, b_var),
2332                          ("parameters", beautify_params(f_param),
2333                           beautify_params(b_param))]:
2334         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2335                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2336                  kind, os_name, base.name,
2337                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2338
2339     # check any missing OSes
2340     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2341     _ErrorIf(missing, constants.CV_ENODEOS, node,
2342              "OSes present on reference node %s but missing on this node: %s",
2343              base.name, utils.CommaJoin(missing))
2344
2345   def _VerifyOob(self, ninfo, nresult):
2346     """Verifies out of band functionality of a node.
2347
2348     @type ninfo: L{objects.Node}
2349     @param ninfo: the node to check
2350     @param nresult: the remote results for the node
2351
2352     """
2353     node = ninfo.name
2354     # We just have to verify the paths on master and/or master candidates
2355     # as the oob helper is invoked on the master
2356     if ((ninfo.master_candidate or ninfo.master_capable) and
2357         constants.NV_OOB_PATHS in nresult):
2358       for path_result in nresult[constants.NV_OOB_PATHS]:
2359         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2360
2361   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2362     """Verifies and updates the node volume data.
2363
2364     This function will update a L{NodeImage}'s internal structures
2365     with data from the remote call.
2366
2367     @type ninfo: L{objects.Node}
2368     @param ninfo: the node to check
2369     @param nresult: the remote results for the node
2370     @param nimg: the node image object
2371     @param vg_name: the configured VG name
2372
2373     """
2374     node = ninfo.name
2375     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2376
2377     nimg.lvm_fail = True
2378     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2379     if vg_name is None:
2380       pass
2381     elif isinstance(lvdata, basestring):
2382       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2383                utils.SafeEncode(lvdata))
2384     elif not isinstance(lvdata, dict):
2385       _ErrorIf(True, constants.CV_ENODELVM, node,
2386                "rpc call to node failed (lvlist)")
2387     else:
2388       nimg.volumes = lvdata
2389       nimg.lvm_fail = False
2390
2391   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2392     """Verifies and updates the node instance list.
2393
2394     If the listing was successful, then updates this node's instance
2395     list. Otherwise, it marks the RPC call as failed for the instance
2396     list key.
2397
2398     @type ninfo: L{objects.Node}
2399     @param ninfo: the node to check
2400     @param nresult: the remote results for the node
2401     @param nimg: the node image object
2402
2403     """
2404     idata = nresult.get(constants.NV_INSTANCELIST, None)
2405     test = not isinstance(idata, list)
2406     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2407                   "rpc call to node failed (instancelist): %s",
2408                   utils.SafeEncode(str(idata)))
2409     if test:
2410       nimg.hyp_fail = True
2411     else:
2412       nimg.instances = idata
2413
2414   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2415     """Verifies and computes a node information map
2416
2417     @type ninfo: L{objects.Node}
2418     @param ninfo: the node to check
2419     @param nresult: the remote results for the node
2420     @param nimg: the node image object
2421     @param vg_name: the configured VG name
2422
2423     """
2424     node = ninfo.name
2425     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2426
2427     # try to read free memory (from the hypervisor)
2428     hv_info = nresult.get(constants.NV_HVINFO, None)
2429     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2430     _ErrorIf(test, constants.CV_ENODEHV, node,
2431              "rpc call to node failed (hvinfo)")
2432     if not test:
2433       try:
2434         nimg.mfree = int(hv_info["memory_free"])
2435       except (ValueError, TypeError):
2436         _ErrorIf(True, constants.CV_ENODERPC, node,
2437                  "node returned invalid nodeinfo, check hypervisor")
2438
2439     # FIXME: devise a free space model for file based instances as well
2440     if vg_name is not None:
2441       test = (constants.NV_VGLIST not in nresult or
2442               vg_name not in nresult[constants.NV_VGLIST])
2443       _ErrorIf(test, constants.CV_ENODELVM, node,
2444                "node didn't return data for the volume group '%s'"
2445                " - it is either missing or broken", vg_name)
2446       if not test:
2447         try:
2448           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2449         except (ValueError, TypeError):
2450           _ErrorIf(True, constants.CV_ENODERPC, node,
2451                    "node returned invalid LVM info, check LVM status")
2452
2453   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2454     """Gets per-disk status information for all instances.
2455
2456     @type nodelist: list of strings
2457     @param nodelist: Node names
2458     @type node_image: dict of (name, L{objects.Node})
2459     @param node_image: Node objects
2460     @type instanceinfo: dict of (name, L{objects.Instance})
2461     @param instanceinfo: Instance objects
2462     @rtype: {instance: {node: [(succes, payload)]}}
2463     @return: a dictionary of per-instance dictionaries with nodes as
2464         keys and disk information as values; the disk information is a
2465         list of tuples (success, payload)
2466
2467     """
2468     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2469
2470     node_disks = {}
2471     node_disks_devonly = {}
2472     diskless_instances = set()
2473     diskless = constants.DT_DISKLESS
2474
2475     for nname in nodelist:
2476       node_instances = list(itertools.chain(node_image[nname].pinst,
2477                                             node_image[nname].sinst))
2478       diskless_instances.update(inst for inst in node_instances
2479                                 if instanceinfo[inst].disk_template == diskless)
2480       disks = [(inst, disk)
2481                for inst in node_instances
2482                for disk in instanceinfo[inst].disks]
2483
2484       if not disks:
2485         # No need to collect data
2486         continue
2487
2488       node_disks[nname] = disks
2489
2490       # Creating copies as SetDiskID below will modify the objects and that can
2491       # lead to incorrect data returned from nodes
2492       devonly = [dev.Copy() for (_, dev) in disks]
2493
2494       for dev in devonly:
2495         self.cfg.SetDiskID(dev, nname)
2496
2497       node_disks_devonly[nname] = devonly
2498
2499     assert len(node_disks) == len(node_disks_devonly)
2500
2501     # Collect data from all nodes with disks
2502     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2503                                                           node_disks_devonly)
2504
2505     assert len(result) == len(node_disks)
2506
2507     instdisk = {}
2508
2509     for (nname, nres) in result.items():
2510       disks = node_disks[nname]
2511
2512       if nres.offline:
2513         # No data from this node
2514         data = len(disks) * [(False, "node offline")]
2515       else:
2516         msg = nres.fail_msg
2517         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2518                  "while getting disk information: %s", msg)
2519         if msg:
2520           # No data from this node
2521           data = len(disks) * [(False, msg)]
2522         else:
2523           data = []
2524           for idx, i in enumerate(nres.payload):
2525             if isinstance(i, (tuple, list)) and len(i) == 2:
2526               data.append(i)
2527             else:
2528               logging.warning("Invalid result from node %s, entry %d: %s",
2529                               nname, idx, i)
2530               data.append((False, "Invalid result from the remote node"))
2531
2532       for ((inst, _), status) in zip(disks, data):
2533         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2534
2535     # Add empty entries for diskless instances.
2536     for inst in diskless_instances:
2537       assert inst not in instdisk
2538       instdisk[inst] = {}
2539
2540     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2541                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2542                       compat.all(isinstance(s, (tuple, list)) and
2543                                  len(s) == 2 for s in statuses)
2544                       for inst, nnames in instdisk.items()
2545                       for nname, statuses in nnames.items())
2546     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2547
2548     return instdisk
2549
2550   @staticmethod
2551   def _SshNodeSelector(group_uuid, all_nodes):
2552     """Create endless iterators for all potential SSH check hosts.
2553
2554     """
2555     nodes = [node for node in all_nodes
2556              if (node.group != group_uuid and
2557                  not node.offline)]
2558     keyfunc = operator.attrgetter("group")
2559
2560     return map(itertools.cycle,
2561                [sorted(map(operator.attrgetter("name"), names))
2562                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2563                                                   keyfunc)])
2564
2565   @classmethod
2566   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2567     """Choose which nodes should talk to which other nodes.
2568
2569     We will make nodes contact all nodes in their group, and one node from
2570     every other group.
2571
2572     @warning: This algorithm has a known issue if one node group is much
2573       smaller than others (e.g. just one node). In such a case all other
2574       nodes will talk to the single node.
2575
2576     """
2577     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2578     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2579
2580     return (online_nodes,
2581             dict((name, sorted([i.next() for i in sel]))
2582                  for name in online_nodes))
2583
2584   def BuildHooksEnv(self):
2585     """Build hooks env.
2586
2587     Cluster-Verify hooks just ran in the post phase and their failure makes
2588     the output be logged in the verify output and the verification to fail.
2589
2590     """
2591     env = {
2592       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2593       }
2594
2595     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2596                for node in self.my_node_info.values())
2597
2598     return env
2599
2600   def BuildHooksNodes(self):
2601     """Build hooks nodes.
2602
2603     """
2604     return ([], self.my_node_names)
2605
2606   def Exec(self, feedback_fn):
2607     """Verify integrity of the node group, performing various test on nodes.
2608
2609     """
2610     # This method has too many local variables. pylint: disable=R0914
2611     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2612
2613     if not self.my_node_names:
2614       # empty node group
2615       feedback_fn("* Empty node group, skipping verification")
2616       return True
2617
2618     self.bad = False
2619     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2620     verbose = self.op.verbose
2621     self._feedback_fn = feedback_fn
2622
2623     vg_name = self.cfg.GetVGName()
2624     drbd_helper = self.cfg.GetDRBDHelper()
2625     cluster = self.cfg.GetClusterInfo()
2626     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2627     hypervisors = cluster.enabled_hypervisors
2628     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2629
2630     i_non_redundant = [] # Non redundant instances
2631     i_non_a_balanced = [] # Non auto-balanced instances
2632     n_offline = 0 # Count of offline nodes
2633     n_drained = 0 # Count of nodes being drained
2634     node_vol_should = {}
2635
2636     # FIXME: verify OS list
2637
2638     # File verification
2639     filemap = _ComputeAncillaryFiles(cluster, False)
2640
2641     # do local checksums
2642     master_node = self.master_node = self.cfg.GetMasterNode()
2643     master_ip = self.cfg.GetMasterIP()
2644
2645     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2646
2647     node_verify_param = {
2648       constants.NV_FILELIST:
2649         utils.UniqueSequence(filename
2650                              for files in filemap
2651                              for filename in files),
2652       constants.NV_NODELIST:
2653         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2654                                   self.all_node_info.values()),
2655       constants.NV_HYPERVISOR: hypervisors,
2656       constants.NV_HVPARAMS:
2657         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2658       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2659                                  for node in node_data_list
2660                                  if not node.offline],
2661       constants.NV_INSTANCELIST: hypervisors,
2662       constants.NV_VERSION: None,
2663       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2664       constants.NV_NODESETUP: None,
2665       constants.NV_TIME: None,
2666       constants.NV_MASTERIP: (master_node, master_ip),
2667       constants.NV_OSLIST: None,
2668       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2669       }
2670
2671     if vg_name is not None:
2672       node_verify_param[constants.NV_VGLIST] = None
2673       node_verify_param[constants.NV_LVLIST] = vg_name
2674       node_verify_param[constants.NV_PVLIST] = [vg_name]
2675       node_verify_param[constants.NV_DRBDLIST] = None
2676
2677     if drbd_helper:
2678       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2679
2680     # bridge checks
2681     # FIXME: this needs to be changed per node-group, not cluster-wide
2682     bridges = set()
2683     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2684     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2685       bridges.add(default_nicpp[constants.NIC_LINK])
2686     for instance in self.my_inst_info.values():
2687       for nic in instance.nics:
2688         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2689         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2690           bridges.add(full_nic[constants.NIC_LINK])
2691
2692     if bridges:
2693       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2694
2695     # Build our expected cluster state
2696     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2697                                                  name=node.name,
2698                                                  vm_capable=node.vm_capable))
2699                       for node in node_data_list)
2700
2701     # Gather OOB paths
2702     oob_paths = []
2703     for node in self.all_node_info.values():
2704       path = _SupportsOob(self.cfg, node)
2705       if path and path not in oob_paths:
2706         oob_paths.append(path)
2707
2708     if oob_paths:
2709       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2710
2711     for instance in self.my_inst_names:
2712       inst_config = self.my_inst_info[instance]
2713
2714       for nname in inst_config.all_nodes:
2715         if nname not in node_image:
2716           gnode = self.NodeImage(name=nname)
2717           gnode.ghost = (nname not in self.all_node_info)
2718           node_image[nname] = gnode
2719
2720       inst_config.MapLVsByNode(node_vol_should)
2721
2722       pnode = inst_config.primary_node
2723       node_image[pnode].pinst.append(instance)
2724
2725       for snode in inst_config.secondary_nodes:
2726         nimg = node_image[snode]
2727         nimg.sinst.append(instance)
2728         if pnode not in nimg.sbp:
2729           nimg.sbp[pnode] = []
2730         nimg.sbp[pnode].append(instance)
2731
2732     # At this point, we have the in-memory data structures complete,
2733     # except for the runtime information, which we'll gather next
2734
2735     # Due to the way our RPC system works, exact response times cannot be
2736     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2737     # time before and after executing the request, we can at least have a time
2738     # window.
2739     nvinfo_starttime = time.time()
2740     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2741                                            node_verify_param,
2742                                            self.cfg.GetClusterName())
2743     nvinfo_endtime = time.time()
2744
2745     if self.extra_lv_nodes and vg_name is not None:
2746       extra_lv_nvinfo = \
2747           self.rpc.call_node_verify(self.extra_lv_nodes,
2748                                     {constants.NV_LVLIST: vg_name},
2749                                     self.cfg.GetClusterName())
2750     else:
2751       extra_lv_nvinfo = {}
2752
2753     all_drbd_map = self.cfg.ComputeDRBDMap()
2754
2755     feedback_fn("* Gathering disk information (%s nodes)" %
2756                 len(self.my_node_names))
2757     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2758                                      self.my_inst_info)
2759
2760     feedback_fn("* Verifying configuration file consistency")
2761
2762     # If not all nodes are being checked, we need to make sure the master node
2763     # and a non-checked vm_capable node are in the list.
2764     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2765     if absent_nodes:
2766       vf_nvinfo = all_nvinfo.copy()
2767       vf_node_info = list(self.my_node_info.values())
2768       additional_nodes = []
2769       if master_node not in self.my_node_info:
2770         additional_nodes.append(master_node)
2771         vf_node_info.append(self.all_node_info[master_node])
2772       # Add the first vm_capable node we find which is not included
2773       for node in absent_nodes:
2774         nodeinfo = self.all_node_info[node]
2775         if nodeinfo.vm_capable and not nodeinfo.offline:
2776           additional_nodes.append(node)
2777           vf_node_info.append(self.all_node_info[node])
2778           break
2779       key = constants.NV_FILELIST
2780       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2781                                                  {key: node_verify_param[key]},
2782                                                  self.cfg.GetClusterName()))
2783     else:
2784       vf_nvinfo = all_nvinfo
2785       vf_node_info = self.my_node_info.values()
2786
2787     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2788
2789     feedback_fn("* Verifying node status")
2790
2791     refos_img = None
2792
2793     for node_i in node_data_list:
2794       node = node_i.name
2795       nimg = node_image[node]
2796
2797       if node_i.offline:
2798         if verbose:
2799           feedback_fn("* Skipping offline node %s" % (node,))
2800         n_offline += 1
2801         continue
2802
2803       if node == master_node:
2804         ntype = "master"
2805       elif node_i.master_candidate:
2806         ntype = "master candidate"
2807       elif node_i.drained:
2808         ntype = "drained"
2809         n_drained += 1
2810       else:
2811         ntype = "regular"
2812       if verbose:
2813         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2814
2815       msg = all_nvinfo[node].fail_msg
2816       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2817                msg)
2818       if msg:
2819         nimg.rpc_fail = True
2820         continue
2821
2822       nresult = all_nvinfo[node].payload
2823
2824       nimg.call_ok = self._VerifyNode(node_i, nresult)
2825       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2826       self._VerifyNodeNetwork(node_i, nresult)
2827       self._VerifyOob(node_i, nresult)
2828
2829       if nimg.vm_capable:
2830         self._VerifyNodeLVM(node_i, nresult, vg_name)
2831         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2832                              all_drbd_map)
2833
2834         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2835         self._UpdateNodeInstances(node_i, nresult, nimg)
2836         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2837         self._UpdateNodeOS(node_i, nresult, nimg)
2838
2839         if not nimg.os_fail:
2840           if refos_img is None:
2841             refos_img = nimg
2842           self._VerifyNodeOS(node_i, nimg, refos_img)
2843         self._VerifyNodeBridges(node_i, nresult, bridges)
2844
2845         # Check whether all running instancies are primary for the node. (This
2846         # can no longer be done from _VerifyInstance below, since some of the
2847         # wrong instances could be from other node groups.)
2848         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2849
2850         for inst in non_primary_inst:
2851           test = inst in self.all_inst_info
2852           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2853                    "instance should not run on node %s", node_i.name)
2854           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2855                    "node is running unknown instance %s", inst)
2856
2857     for node, result in extra_lv_nvinfo.items():
2858       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2859                               node_image[node], vg_name)
2860
2861     feedback_fn("* Verifying instance status")
2862     for instance in self.my_inst_names:
2863       if verbose:
2864         feedback_fn("* Verifying instance %s" % instance)
2865       inst_config = self.my_inst_info[instance]
2866       self._VerifyInstance(instance, inst_config, node_image,
2867                            instdisk[instance])
2868       inst_nodes_offline = []
2869
2870       pnode = inst_config.primary_node
2871       pnode_img = node_image[pnode]
2872       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2873                constants.CV_ENODERPC, pnode, "instance %s, connection to"
2874                " primary node failed", instance)
2875
2876       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2877                constants.CV_EINSTANCEBADNODE, instance,
2878                "instance is marked as running and lives on offline node %s",
2879                inst_config.primary_node)
2880
2881       # If the instance is non-redundant we cannot survive losing its primary
2882       # node, so we are not N+1 compliant. On the other hand we have no disk
2883       # templates with more than one secondary so that situation is not well
2884       # supported either.
2885       # FIXME: does not support file-backed instances
2886       if not inst_config.secondary_nodes:
2887         i_non_redundant.append(instance)
2888
2889       _ErrorIf(len(inst_config.secondary_nodes) > 1,
2890                constants.CV_EINSTANCELAYOUT,
2891                instance, "instance has multiple secondary nodes: %s",
2892                utils.CommaJoin(inst_config.secondary_nodes),
2893                code=self.ETYPE_WARNING)
2894
2895       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2896         pnode = inst_config.primary_node
2897         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2898         instance_groups = {}
2899
2900         for node in instance_nodes:
2901           instance_groups.setdefault(self.all_node_info[node].group,
2902                                      []).append(node)
2903
2904         pretty_list = [
2905           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2906           # Sort so that we always list the primary node first.
2907           for group, nodes in sorted(instance_groups.items(),
2908                                      key=lambda (_, nodes): pnode in nodes,
2909                                      reverse=True)]
2910
2911         self._ErrorIf(len(instance_groups) > 1,
2912                       constants.CV_EINSTANCESPLITGROUPS,
2913                       instance, "instance has primary and secondary nodes in"
2914                       " different groups: %s", utils.CommaJoin(pretty_list),
2915                       code=self.ETYPE_WARNING)
2916
2917       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2918         i_non_a_balanced.append(instance)
2919
2920       for snode in inst_config.secondary_nodes:
2921         s_img = node_image[snode]
2922         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2923                  snode, "instance %s, connection to secondary node failed",
2924                  instance)
2925
2926         if s_img.offline:
2927           inst_nodes_offline.append(snode)
2928
2929       # warn that the instance lives on offline nodes
2930       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2931                "instance has offline secondary node(s) %s",
2932                utils.CommaJoin(inst_nodes_offline))
2933       # ... or ghost/non-vm_capable nodes
2934       for node in inst_config.all_nodes:
2935         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2936                  instance, "instance lives on ghost node %s", node)
2937         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2938                  instance, "instance lives on non-vm_capable node %s", node)
2939
2940     feedback_fn("* Verifying orphan volumes")
2941     reserved = utils.FieldSet(*cluster.reserved_lvs)
2942
2943     # We will get spurious "unknown volume" warnings if any node of this group
2944     # is secondary for an instance whose primary is in another group. To avoid
2945     # them, we find these instances and add their volumes to node_vol_should.
2946     for inst in self.all_inst_info.values():
2947       for secondary in inst.secondary_nodes:
2948         if (secondary in self.my_node_info
2949             and inst.name not in self.my_inst_info):
2950           inst.MapLVsByNode(node_vol_should)
2951           break
2952
2953     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2954
2955     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2956       feedback_fn("* Verifying N+1 Memory redundancy")
2957       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2958
2959     feedback_fn("* Other Notes")
2960     if i_non_redundant:
2961       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2962                   % len(i_non_redundant))
2963
2964     if i_non_a_balanced:
2965       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2966                   % len(i_non_a_balanced))
2967
2968     if n_offline:
2969       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2970
2971     if n_drained:
2972       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2973
2974     return not self.bad
2975
2976   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2977     """Analyze the post-hooks' result
2978
2979     This method analyses the hook result, handles it, and sends some
2980     nicely-formatted feedback back to the user.
2981
2982     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2983         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2984     @param hooks_results: the results of the multi-node hooks rpc call
2985     @param feedback_fn: function used send feedback back to the caller
2986     @param lu_result: previous Exec result
2987     @return: the new Exec result, based on the previous result
2988         and hook results
2989
2990     """
2991     # We only really run POST phase hooks, only for non-empty groups,
2992     # and are only interested in their results
2993     if not self.my_node_names:
2994       # empty node group
2995       pass
2996     elif phase == constants.HOOKS_PHASE_POST:
2997       # Used to change hooks' output to proper indentation
2998       feedback_fn("* Hooks Results")
2999       assert hooks_results, "invalid result from hooks"
3000
3001       for node_name in hooks_results:
3002         res = hooks_results[node_name]
3003         msg = res.fail_msg
3004         test = msg and not res.offline
3005         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3006                       "Communication failure in hooks execution: %s", msg)
3007         if res.offline or msg:
3008           # No need to investigate payload if node is offline or gave
3009           # an error.
3010           continue
3011         for script, hkr, output in res.payload:
3012           test = hkr == constants.HKR_FAIL
3013           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3014                         "Script %s failed, output:", script)
3015           if test:
3016             output = self._HOOKS_INDENT_RE.sub("      ", output)
3017             feedback_fn("%s" % output)
3018             lu_result = False
3019
3020     return lu_result
3021
3022
3023 class LUClusterVerifyDisks(NoHooksLU):
3024   """Verifies the cluster disks status.
3025
3026   """
3027   REQ_BGL = False
3028
3029   def ExpandNames(self):
3030     self.share_locks = _ShareAll()
3031     self.needed_locks = {
3032       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3033       }
3034
3035   def Exec(self, feedback_fn):
3036     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3037
3038     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3039     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3040                            for group in group_names])
3041
3042
3043 class LUGroupVerifyDisks(NoHooksLU):
3044   """Verifies the status of all disks in a node group.
3045
3046   """
3047   REQ_BGL = False
3048
3049   def ExpandNames(self):
3050     # Raises errors.OpPrereqError on its own if group can't be found
3051     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3052
3053     self.share_locks = _ShareAll()
3054     self.needed_locks = {
3055       locking.LEVEL_INSTANCE: [],
3056       locking.LEVEL_NODEGROUP: [],
3057       locking.LEVEL_NODE: [],
3058       }
3059
3060   def DeclareLocks(self, level):
3061     if level == locking.LEVEL_INSTANCE:
3062       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3063
3064       # Lock instances optimistically, needs verification once node and group
3065       # locks have been acquired
3066       self.needed_locks[locking.LEVEL_INSTANCE] = \
3067         self.cfg.GetNodeGroupInstances(self.group_uuid)
3068
3069     elif level == locking.LEVEL_NODEGROUP:
3070       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3071
3072       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3073         set([self.group_uuid] +
3074             # Lock all groups used by instances optimistically; this requires
3075             # going via the node before it's locked, requiring verification
3076             # later on
3077             [group_uuid
3078              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3079              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3080
3081     elif level == locking.LEVEL_NODE:
3082       # This will only lock the nodes in the group to be verified which contain
3083       # actual instances
3084       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3085       self._LockInstancesNodes()
3086
3087       # Lock all nodes in group to be verified
3088       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3089       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3090       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3091
3092   def CheckPrereq(self):
3093     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3094     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3095     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3096
3097     assert self.group_uuid in owned_groups
3098
3099     # Check if locked instances are still correct
3100     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3101
3102     # Get instance information
3103     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3104
3105     # Check if node groups for locked instances are still correct
3106     for (instance_name, inst) in self.instances.items():
3107       assert owned_nodes.issuperset(inst.all_nodes), \
3108         "Instance %s's nodes changed while we kept the lock" % instance_name
3109
3110       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3111                                              owned_groups)
3112
3113       assert self.group_uuid in inst_groups, \
3114         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3115
3116   def Exec(self, feedback_fn):
3117     """Verify integrity of cluster disks.
3118
3119     @rtype: tuple of three items
3120     @return: a tuple of (dict of node-to-node_error, list of instances
3121         which need activate-disks, dict of instance: (node, volume) for
3122         missing volumes
3123
3124     """
3125     res_nodes = {}
3126     res_instances = set()
3127     res_missing = {}
3128
3129     nv_dict = _MapInstanceDisksToNodes([inst
3130                                         for inst in self.instances.values()
3131                                         if inst.admin_up])
3132
3133     if nv_dict:
3134       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3135                              set(self.cfg.GetVmCapableNodeList()))
3136
3137       node_lvs = self.rpc.call_lv_list(nodes, [])
3138
3139       for (node, node_res) in node_lvs.items():
3140         if node_res.offline:
3141           continue
3142
3143         msg = node_res.fail_msg
3144         if msg:
3145           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3146           res_nodes[node] = msg
3147           continue
3148
3149         for lv_name, (_, _, lv_online) in node_res.payload.items():
3150           inst = nv_dict.pop((node, lv_name), None)
3151           if not (lv_online or inst is None):
3152             res_instances.add(inst)
3153
3154       # any leftover items in nv_dict are missing LVs, let's arrange the data
3155       # better
3156       for key, inst in nv_dict.iteritems():
3157         res_missing.setdefault(inst, []).append(key)
3158
3159     return (res_nodes, list(res_instances), res_missing)
3160
3161
3162 class LUClusterRepairDiskSizes(NoHooksLU):
3163   """Verifies the cluster disks sizes.
3164
3165   """
3166   REQ_BGL = False
3167
3168   def ExpandNames(self):
3169     if self.op.instances:
3170       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3171       self.needed_locks = {
3172         locking.LEVEL_NODE: [],
3173         locking.LEVEL_INSTANCE: self.wanted_names,
3174         }
3175       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3176     else:
3177       self.wanted_names = None
3178       self.needed_locks = {
3179         locking.LEVEL_NODE: locking.ALL_SET,
3180         locking.LEVEL_INSTANCE: locking.ALL_SET,
3181         }
3182     self.share_locks = _ShareAll()
3183
3184   def DeclareLocks(self, level):
3185     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3186       self._LockInstancesNodes(primary_only=True)
3187
3188   def CheckPrereq(self):
3189     """Check prerequisites.
3190
3191     This only checks the optional instance list against the existing names.
3192
3193     """
3194     if self.wanted_names is None:
3195       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3196
3197     self.wanted_instances = \
3198         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3199
3200   def _EnsureChildSizes(self, disk):
3201     """Ensure children of the disk have the needed disk size.
3202
3203     This is valid mainly for DRBD8 and fixes an issue where the
3204     children have smaller disk size.
3205
3206     @param disk: an L{ganeti.objects.Disk} object
3207
3208     """
3209     if disk.dev_type == constants.LD_DRBD8:
3210       assert disk.children, "Empty children for DRBD8?"
3211       fchild = disk.children[0]
3212       mismatch = fchild.size < disk.size
3213       if mismatch:
3214         self.LogInfo("Child disk has size %d, parent %d, fixing",
3215                      fchild.size, disk.size)
3216         fchild.size = disk.size
3217
3218       # and we recurse on this child only, not on the metadev
3219       return self._EnsureChildSizes(fchild) or mismatch
3220     else:
3221       return False
3222
3223   def Exec(self, feedback_fn):
3224     """Verify the size of cluster disks.
3225
3226     """
3227     # TODO: check child disks too
3228     # TODO: check differences in size between primary/secondary nodes
3229     per_node_disks = {}
3230     for instance in self.wanted_instances:
3231       pnode = instance.primary_node
3232       if pnode not in per_node_disks:
3233         per_node_disks[pnode] = []
3234       for idx, disk in enumerate(instance.disks):
3235         per_node_disks[pnode].append((instance, idx, disk))
3236
3237     changed = []
3238     for node, dskl in per_node_disks.items():
3239       newl = [v[2].Copy() for v in dskl]
3240       for dsk in newl:
3241         self.cfg.SetDiskID(dsk, node)
3242       result = self.rpc.call_blockdev_getsize(node, newl)
3243       if result.fail_msg:
3244         self.LogWarning("Failure in blockdev_getsize call to node"
3245                         " %s, ignoring", node)
3246         continue
3247       if len(result.payload) != len(dskl):
3248         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3249                         " result.payload=%s", node, len(dskl), result.payload)
3250         self.LogWarning("Invalid result from node %s, ignoring node results",
3251                         node)
3252         continue
3253       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3254         if size is None:
3255           self.LogWarning("Disk %d of instance %s did not return size"
3256                           " information, ignoring", idx, instance.name)
3257           continue
3258         if not isinstance(size, (int, long)):
3259           self.LogWarning("Disk %d of instance %s did not return valid"
3260                           " size information, ignoring", idx, instance.name)
3261           continue
3262         size = size >> 20
3263         if size != disk.size:
3264           self.LogInfo("Disk %d of instance %s has mismatched size,"
3265                        " correcting: recorded %d, actual %d", idx,
3266                        instance.name, disk.size, size)
3267           disk.size = size
3268           self.cfg.Update(instance, feedback_fn)
3269           changed.append((instance.name, idx, size))
3270         if self._EnsureChildSizes(disk):
3271           self.cfg.Update(instance, feedback_fn)
3272           changed.append((instance.name, idx, disk.size))
3273     return changed
3274
3275
3276 class LUClusterRename(LogicalUnit):
3277   """Rename the cluster.
3278
3279   """
3280   HPATH = "cluster-rename"
3281   HTYPE = constants.HTYPE_CLUSTER
3282
3283   def BuildHooksEnv(self):
3284     """Build hooks env.
3285
3286     """
3287     return {
3288       "OP_TARGET": self.cfg.GetClusterName(),
3289       "NEW_NAME": self.op.name,
3290       }
3291
3292   def BuildHooksNodes(self):
3293     """Build hooks nodes.
3294
3295     """
3296     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3297
3298   def CheckPrereq(self):
3299     """Verify that the passed name is a valid one.
3300
3301     """
3302     hostname = netutils.GetHostname(name=self.op.name,
3303                                     family=self.cfg.GetPrimaryIPFamily())
3304
3305     new_name = hostname.name
3306     self.ip = new_ip = hostname.ip
3307     old_name = self.cfg.GetClusterName()
3308     old_ip = self.cfg.GetMasterIP()
3309     if new_name == old_name and new_ip == old_ip:
3310       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3311                                  " cluster has changed",
3312                                  errors.ECODE_INVAL)
3313     if new_ip != old_ip:
3314       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3315         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3316                                    " reachable on the network" %
3317                                    new_ip, errors.ECODE_NOTUNIQUE)
3318
3319     self.op.name = new_name
3320
3321   def Exec(self, feedback_fn):
3322     """Rename the cluster.
3323
3324     """
3325     clustername = self.op.name
3326     ip = self.ip
3327
3328     # shutdown the master IP
3329     master = self.cfg.GetMasterNode()
3330     result = self.rpc.call_node_deactivate_master_ip(master)
3331     result.Raise("Could not disable the master role")
3332
3333     try:
3334       cluster = self.cfg.GetClusterInfo()
3335       cluster.cluster_name = clustername
3336       cluster.master_ip = ip
3337       self.cfg.Update(cluster, feedback_fn)
3338
3339       # update the known hosts file
3340       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3341       node_list = self.cfg.GetOnlineNodeList()
3342       try:
3343         node_list.remove(master)
3344       except ValueError:
3345         pass
3346       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3347     finally:
3348       result = self.rpc.call_node_activate_master_ip(master)
3349       msg = result.fail_msg
3350       if msg:
3351         self.LogWarning("Could not re-enable the master role on"
3352                         " the master, please restart manually: %s", msg)
3353
3354     return clustername
3355
3356
3357 def _ValidateNetmask(cfg, netmask):
3358   """Checks if a netmask is valid.
3359
3360   @type cfg: L{config.ConfigWriter}
3361   @param cfg: The cluster configuration
3362   @type netmask: int
3363   @param netmask: the netmask to be verified
3364   @raise errors.OpPrereqError: if the validation fails
3365
3366   """
3367   ip_family = cfg.GetPrimaryIPFamily()
3368   try:
3369     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3370   except errors.ProgrammerError:
3371     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3372                                ip_family)
3373   if not ipcls.ValidateNetmask(netmask):
3374     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3375                                 (netmask))
3376
3377
3378 class LUClusterSetParams(LogicalUnit):
3379   """Change the parameters of the cluster.
3380
3381   """
3382   HPATH = "cluster-modify"
3383   HTYPE = constants.HTYPE_CLUSTER
3384   REQ_BGL = False
3385
3386   def CheckArguments(self):
3387     """Check parameters
3388
3389     """
3390     if self.op.uid_pool:
3391       uidpool.CheckUidPool(self.op.uid_pool)
3392
3393     if self.op.add_uids:
3394       uidpool.CheckUidPool(self.op.add_uids)
3395
3396     if self.op.remove_uids:
3397       uidpool.CheckUidPool(self.op.remove_uids)
3398
3399     if self.op.master_netmask is not None:
3400       _ValidateNetmask(self.cfg, self.op.master_netmask)
3401
3402   def ExpandNames(self):
3403     # FIXME: in the future maybe other cluster params won't require checking on
3404     # all nodes to be modified.
3405     self.needed_locks = {
3406       locking.LEVEL_NODE: locking.ALL_SET,
3407     }
3408     self.share_locks[locking.LEVEL_NODE] = 1
3409
3410   def BuildHooksEnv(self):
3411     """Build hooks env.
3412
3413     """
3414     return {
3415       "OP_TARGET": self.cfg.GetClusterName(),
3416       "NEW_VG_NAME": self.op.vg_name,
3417       }
3418
3419   def BuildHooksNodes(self):
3420     """Build hooks nodes.
3421
3422     """
3423     mn = self.cfg.GetMasterNode()
3424     return ([mn], [mn])
3425
3426   def CheckPrereq(self):
3427     """Check prerequisites.
3428
3429     This checks whether the given params don't conflict and
3430     if the given volume group is valid.
3431
3432     """
3433     if self.op.vg_name is not None and not self.op.vg_name:
3434       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3435         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3436                                    " instances exist", errors.ECODE_INVAL)
3437
3438     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3439       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3440         raise errors.OpPrereqError("Cannot disable drbd helper while"
3441                                    " drbd-based instances exist",
3442                                    errors.ECODE_INVAL)
3443
3444     node_list = self.owned_locks(locking.LEVEL_NODE)
3445
3446     # if vg_name not None, checks given volume group on all nodes
3447     if self.op.vg_name:
3448       vglist = self.rpc.call_vg_list(node_list)
3449       for node in node_list:
3450         msg = vglist[node].fail_msg
3451         if msg:
3452           # ignoring down node
3453           self.LogWarning("Error while gathering data on node %s"
3454                           " (ignoring node): %s", node, msg)
3455           continue
3456         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3457                                               self.op.vg_name,
3458                                               constants.MIN_VG_SIZE)
3459         if vgstatus:
3460           raise errors.OpPrereqError("Error on node '%s': %s" %
3461                                      (node, vgstatus), errors.ECODE_ENVIRON)
3462
3463     if self.op.drbd_helper:
3464       # checks given drbd helper on all nodes
3465       helpers = self.rpc.call_drbd_helper(node_list)
3466       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3467         if ninfo.offline:
3468           self.LogInfo("Not checking drbd helper on offline node %s", node)
3469           continue
3470         msg = helpers[node].fail_msg
3471         if msg:
3472           raise errors.OpPrereqError("Error checking drbd helper on node"
3473                                      " '%s': %s" % (node, msg),
3474                                      errors.ECODE_ENVIRON)
3475         node_helper = helpers[node].payload
3476         if node_helper != self.op.drbd_helper:
3477           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3478                                      (node, node_helper), errors.ECODE_ENVIRON)
3479
3480     self.cluster = cluster = self.cfg.GetClusterInfo()
3481     # validate params changes
3482     if self.op.beparams:
3483       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3484       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3485
3486     if self.op.ndparams:
3487       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3488       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3489
3490       # TODO: we need a more general way to handle resetting
3491       # cluster-level parameters to default values
3492       if self.new_ndparams["oob_program"] == "":
3493         self.new_ndparams["oob_program"] = \
3494             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3495
3496     if self.op.nicparams:
3497       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3498       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3499       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3500       nic_errors = []
3501
3502       # check all instances for consistency
3503       for instance in self.cfg.GetAllInstancesInfo().values():
3504         for nic_idx, nic in enumerate(instance.nics):
3505           params_copy = copy.deepcopy(nic.nicparams)
3506           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3507
3508           # check parameter syntax
3509           try:
3510             objects.NIC.CheckParameterSyntax(params_filled)
3511           except errors.ConfigurationError, err:
3512             nic_errors.append("Instance %s, nic/%d: %s" %
3513                               (instance.name, nic_idx, err))
3514
3515           # if we're moving instances to routed, check that they have an ip
3516           target_mode = params_filled[constants.NIC_MODE]
3517           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3518             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3519                               " address" % (instance.name, nic_idx))
3520       if nic_errors:
3521         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3522                                    "\n".join(nic_errors))
3523
3524     # hypervisor list/parameters
3525     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3526     if self.op.hvparams:
3527       for hv_name, hv_dict in self.op.hvparams.items():
3528         if hv_name not in self.new_hvparams:
3529           self.new_hvparams[hv_name] = hv_dict
3530         else:
3531           self.new_hvparams[hv_name].update(hv_dict)
3532
3533     # os hypervisor parameters
3534     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3535     if self.op.os_hvp:
3536       for os_name, hvs in self.op.os_hvp.items():
3537         if os_name not in self.new_os_hvp:
3538           self.new_os_hvp[os_name] = hvs
3539         else:
3540           for hv_name, hv_dict in hvs.items():
3541             if hv_name not in self.new_os_hvp[os_name]:
3542               self.new_os_hvp[os_name][hv_name] = hv_dict
3543             else:
3544               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3545
3546     # os parameters
3547     self.new_osp = objects.FillDict(cluster.osparams, {})
3548     if self.op.osparams:
3549       for os_name, osp in self.op.osparams.items():
3550         if os_name not in self.new_osp:
3551           self.new_osp[os_name] = {}
3552
3553         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3554                                                   use_none=True)
3555
3556         if not self.new_osp[os_name]:
3557           # we removed all parameters
3558           del self.new_osp[os_name]
3559         else:
3560           # check the parameter validity (remote check)
3561           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3562                          os_name, self.new_osp[os_name])
3563
3564     # changes to the hypervisor list
3565     if self.op.enabled_hypervisors is not None:
3566       self.hv_list = self.op.enabled_hypervisors
3567       for hv in self.hv_list:
3568         # if the hypervisor doesn't already exist in the cluster
3569         # hvparams, we initialize it to empty, and then (in both
3570         # cases) we make sure to fill the defaults, as we might not
3571         # have a complete defaults list if the hypervisor wasn't
3572         # enabled before
3573         if hv not in new_hvp:
3574           new_hvp[hv] = {}
3575         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3576         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3577     else:
3578       self.hv_list = cluster.enabled_hypervisors
3579
3580     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3581       # either the enabled list has changed, or the parameters have, validate
3582       for hv_name, hv_params in self.new_hvparams.items():
3583         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3584             (self.op.enabled_hypervisors and
3585              hv_name in self.op.enabled_hypervisors)):
3586           # either this is a new hypervisor, or its parameters have changed
3587           hv_class = hypervisor.GetHypervisor(hv_name)
3588           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3589           hv_class.CheckParameterSyntax(hv_params)
3590           _CheckHVParams(self, node_list, hv_name, hv_params)
3591
3592     if self.op.os_hvp:
3593       # no need to check any newly-enabled hypervisors, since the
3594       # defaults have already been checked in the above code-block
3595       for os_name, os_hvp in self.new_os_hvp.items():
3596         for hv_name, hv_params in os_hvp.items():
3597           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3598           # we need to fill in the new os_hvp on top of the actual hv_p
3599           cluster_defaults = self.new_hvparams.get(hv_name, {})
3600           new_osp = objects.FillDict(cluster_defaults, hv_params)
3601           hv_class = hypervisor.GetHypervisor(hv_name)
3602           hv_class.CheckParameterSyntax(new_osp)
3603           _CheckHVParams(self, node_list, hv_name, new_osp)
3604
3605     if self.op.default_iallocator:
3606       alloc_script = utils.FindFile(self.op.default_iallocator,
3607                                     constants.IALLOCATOR_SEARCH_PATH,
3608                                     os.path.isfile)
3609       if alloc_script is None:
3610         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3611                                    " specified" % self.op.default_iallocator,
3612                                    errors.ECODE_INVAL)
3613
3614   def Exec(self, feedback_fn):
3615     """Change the parameters of the cluster.
3616
3617     """
3618     if self.op.vg_name is not None:
3619       new_volume = self.op.vg_name
3620       if not new_volume:
3621         new_volume = None
3622       if new_volume != self.cfg.GetVGName():
3623         self.cfg.SetVGName(new_volume)
3624       else:
3625         feedback_fn("Cluster LVM configuration already in desired"
3626                     " state, not changing")
3627     if self.op.drbd_helper is not None:
3628       new_helper = self.op.drbd_helper
3629       if not new_helper:
3630         new_helper = None
3631       if new_helper != self.cfg.GetDRBDHelper():
3632         self.cfg.SetDRBDHelper(new_helper)
3633       else:
3634         feedback_fn("Cluster DRBD helper already in desired state,"
3635                     " not changing")
3636     if self.op.hvparams:
3637       self.cluster.hvparams = self.new_hvparams
3638     if self.op.os_hvp:
3639       self.cluster.os_hvp = self.new_os_hvp
3640     if self.op.enabled_hypervisors is not None:
3641       self.cluster.hvparams = self.new_hvparams
3642       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3643     if self.op.beparams:
3644       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3645     if self.op.nicparams:
3646       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3647     if self.op.osparams:
3648       self.cluster.osparams = self.new_osp
3649     if self.op.ndparams:
3650       self.cluster.ndparams = self.new_ndparams
3651
3652     if self.op.candidate_pool_size is not None:
3653       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3654       # we need to update the pool size here, otherwise the save will fail
3655       _AdjustCandidatePool(self, [])
3656
3657     if self.op.maintain_node_health is not None:
3658       self.cluster.maintain_node_health = self.op.maintain_node_health
3659
3660     if self.op.prealloc_wipe_disks is not None:
3661       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3662
3663     if self.op.add_uids is not None:
3664       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3665
3666     if self.op.remove_uids is not None:
3667       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3668
3669     if self.op.uid_pool is not None:
3670       self.cluster.uid_pool = self.op.uid_pool
3671
3672     if self.op.default_iallocator is not None:
3673       self.cluster.default_iallocator = self.op.default_iallocator
3674
3675     if self.op.reserved_lvs is not None:
3676       self.cluster.reserved_lvs = self.op.reserved_lvs
3677
3678     def helper_os(aname, mods, desc):
3679       desc += " OS list"
3680       lst = getattr(self.cluster, aname)
3681       for key, val in mods:
3682         if key == constants.DDM_ADD:
3683           if val in lst:
3684             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3685           else:
3686             lst.append(val)
3687         elif key == constants.DDM_REMOVE:
3688           if val in lst:
3689             lst.remove(val)
3690           else:
3691             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3692         else:
3693           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3694
3695     if self.op.hidden_os:
3696       helper_os("hidden_os", self.op.hidden_os, "hidden")
3697
3698     if self.op.blacklisted_os:
3699       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3700
3701     if self.op.master_netdev:
3702       master = self.cfg.GetMasterNode()
3703       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3704                   self.cluster.master_netdev)
3705       result = self.rpc.call_node_deactivate_master_ip(master)
3706       result.Raise("Could not disable the master ip")
3707       feedback_fn("Changing master_netdev from %s to %s" %
3708                   (self.cluster.master_netdev, self.op.master_netdev))
3709       self.cluster.master_netdev = self.op.master_netdev
3710
3711     if self.op.master_netmask:
3712       master = self.cfg.GetMasterNode()
3713       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3714       result = self.rpc.call_node_change_master_netmask(master,
3715                                                         self.op.master_netmask)
3716       if result.fail_msg:
3717         msg = "Could not change the master IP netmask: %s" % result.fail_msg
3718         self.LogWarning(msg)
3719         feedback_fn(msg)
3720       else:
3721         self.cluster.master_netmask = self.op.master_netmask
3722
3723     self.cfg.Update(self.cluster, feedback_fn)
3724
3725     if self.op.master_netdev:
3726       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3727                   self.op.master_netdev)
3728       result = self.rpc.call_node_activate_master_ip(master)
3729       if result.fail_msg:
3730         self.LogWarning("Could not re-enable the master ip on"
3731                         " the master, please restart manually: %s",
3732                         result.fail_msg)
3733
3734
3735 def _UploadHelper(lu, nodes, fname):
3736   """Helper for uploading a file and showing warnings.
3737
3738   """
3739   if os.path.exists(fname):
3740     result = lu.rpc.call_upload_file(nodes, fname)
3741     for to_node, to_result in result.items():
3742       msg = to_result.fail_msg
3743       if msg:
3744         msg = ("Copy of file %s to node %s failed: %s" %
3745                (fname, to_node, msg))
3746         lu.proc.LogWarning(msg)
3747
3748
3749 def _ComputeAncillaryFiles(cluster, redist):
3750   """Compute files external to Ganeti which need to be consistent.
3751
3752   @type redist: boolean
3753   @param redist: Whether to include files which need to be redistributed
3754
3755   """
3756   # Compute files for all nodes
3757   files_all = set([
3758     constants.SSH_KNOWN_HOSTS_FILE,
3759     constants.CONFD_HMAC_KEY,
3760     constants.CLUSTER_DOMAIN_SECRET_FILE,
3761     constants.SPICE_CERT_FILE,
3762     constants.SPICE_CACERT_FILE,
3763     ])
3764
3765   if not redist:
3766     files_all.update(constants.ALL_CERT_FILES)
3767     files_all.update(ssconf.SimpleStore().GetFileList())
3768   else:
3769     # we need to ship at least the RAPI certificate
3770     files_all.add(constants.RAPI_CERT_FILE)
3771
3772   if cluster.modify_etc_hosts:
3773     files_all.add(constants.ETC_HOSTS)
3774
3775   # Files which must either exist on all nodes or on none
3776   files_all_opt = set([
3777     constants.RAPI_USERS_FILE,
3778     ])
3779
3780   # Files which should only be on master candidates
3781   files_mc = set()
3782   if not redist:
3783     files_mc.add(constants.CLUSTER_CONF_FILE)
3784
3785   # Files which should only be on VM-capable nodes
3786   files_vm = set(filename
3787     for hv_name in cluster.enabled_hypervisors
3788     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3789
3790   # Filenames must be unique
3791   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3792           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3793          "Found file listed in more than one file list"
3794
3795   return (files_all, files_all_opt, files_mc, files_vm)
3796
3797
3798 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3799   """Distribute additional files which are part of the cluster configuration.
3800
3801   ConfigWriter takes care of distributing the config and ssconf files, but
3802   there are more files which should be distributed to all nodes. This function
3803   makes sure those are copied.
3804
3805   @param lu: calling logical unit
3806   @param additional_nodes: list of nodes not in the config to distribute to
3807   @type additional_vm: boolean
3808   @param additional_vm: whether the additional nodes are vm-capable or not
3809
3810   """
3811   # Gather target nodes
3812   cluster = lu.cfg.GetClusterInfo()
3813   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3814
3815   online_nodes = lu.cfg.GetOnlineNodeList()
3816   vm_nodes = lu.cfg.GetVmCapableNodeList()
3817
3818   if additional_nodes is not None:
3819     online_nodes.extend(additional_nodes)
3820     if additional_vm:
3821       vm_nodes.extend(additional_nodes)
3822
3823   # Never distribute to master node
3824   for nodelist in [online_nodes, vm_nodes]:
3825     if master_info.name in nodelist:
3826       nodelist.remove(master_info.name)
3827
3828   # Gather file lists
3829   (files_all, files_all_opt, files_mc, files_vm) = \
3830     _ComputeAncillaryFiles(cluster, True)
3831
3832   # Never re-distribute configuration file from here
3833   assert not (constants.CLUSTER_CONF_FILE in files_all or
3834               constants.CLUSTER_CONF_FILE in files_vm)
3835   assert not files_mc, "Master candidates not handled in this function"
3836
3837   filemap = [
3838     (online_nodes, files_all),
3839     (online_nodes, files_all_opt),
3840     (vm_nodes, files_vm),
3841     ]
3842
3843   # Upload the files
3844   for (node_list, files) in filemap:
3845     for fname in files:
3846       _UploadHelper(lu, node_list, fname)
3847
3848
3849 class LUClusterRedistConf(NoHooksLU):
3850   """Force the redistribution of cluster configuration.
3851
3852   This is a very simple LU.
3853
3854   """
3855   REQ_BGL = False
3856
3857   def ExpandNames(self):
3858     self.needed_locks = {
3859       locking.LEVEL_NODE: locking.ALL_SET,
3860     }
3861     self.share_locks[locking.LEVEL_NODE] = 1
3862
3863   def Exec(self, feedback_fn):
3864     """Redistribute the configuration.
3865
3866     """
3867     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3868     _RedistributeAncillaryFiles(self)
3869
3870
3871 class LUClusterActivateMasterIp(NoHooksLU):
3872   """Activate the master IP on the master node.
3873
3874   """
3875   def Exec(self, feedback_fn):
3876     """Activate the master IP.
3877
3878     """
3879     master = self.cfg.GetMasterNode()
3880     self.rpc.call_node_activate_master_ip(master)
3881
3882
3883 class LUClusterDeactivateMasterIp(NoHooksLU):
3884   """Deactivate the master IP on the master node.
3885
3886   """
3887   def Exec(self, feedback_fn):
3888     """Deactivate the master IP.
3889
3890     """
3891     master = self.cfg.GetMasterNode()
3892     self.rpc.call_node_deactivate_master_ip(master)
3893
3894
3895 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3896   """Sleep and poll for an instance's disk to sync.
3897
3898   """
3899   if not instance.disks or disks is not None and not disks:
3900     return True
3901
3902   disks = _ExpandCheckDisks(instance, disks)
3903
3904   if not oneshot:
3905     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3906
3907   node = instance.primary_node
3908
3909   for dev in disks:
3910     lu.cfg.SetDiskID(dev, node)
3911
3912   # TODO: Convert to utils.Retry
3913
3914   retries = 0
3915   degr_retries = 10 # in seconds, as we sleep 1 second each time
3916   while True:
3917     max_time = 0
3918     done = True
3919     cumul_degraded = False
3920     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3921     msg = rstats.fail_msg
3922     if msg:
3923       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3924       retries += 1
3925       if retries >= 10:
3926         raise errors.RemoteError("Can't contact node %s for mirror data,"
3927                                  " aborting." % node)
3928       time.sleep(6)
3929       continue
3930     rstats = rstats.payload
3931     retries = 0
3932     for i, mstat in enumerate(rstats):
3933       if mstat is None:
3934         lu.LogWarning("Can't compute data for node %s/%s",
3935                            node, disks[i].iv_name)
3936         continue
3937
3938       cumul_degraded = (cumul_degraded or
3939                         (mstat.is_degraded and mstat.sync_percent is None))
3940       if mstat.sync_percent is not None:
3941         done = False
3942         if mstat.estimated_time is not None:
3943           rem_time = ("%s remaining (estimated)" %
3944                       utils.FormatSeconds(mstat.estimated_time))
3945           max_time = mstat.estimated_time
3946         else:
3947           rem_time = "no time estimate"
3948         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3949                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3950
3951     # if we're done but degraded, let's do a few small retries, to
3952     # make sure we see a stable and not transient situation; therefore
3953     # we force restart of the loop
3954     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3955       logging.info("Degraded disks found, %d retries left", degr_retries)
3956       degr_retries -= 1
3957       time.sleep(1)
3958       continue
3959
3960     if done or oneshot:
3961       break
3962
3963     time.sleep(min(60, max_time))
3964
3965   if done:
3966     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3967   return not cumul_degraded
3968
3969
3970 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3971   """Check that mirrors are not degraded.
3972
3973   The ldisk parameter, if True, will change the test from the
3974   is_degraded attribute (which represents overall non-ok status for
3975   the device(s)) to the ldisk (representing the local storage status).
3976
3977   """
3978   lu.cfg.SetDiskID(dev, node)
3979
3980   result = True
3981
3982   if on_primary or dev.AssembleOnSecondary():
3983     rstats = lu.rpc.call_blockdev_find(node, dev)
3984     msg = rstats.fail_msg
3985     if msg:
3986       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3987       result = False
3988     elif not rstats.payload:
3989       lu.LogWarning("Can't find disk on node %s", node)
3990       result = False
3991     else:
3992       if ldisk:
3993         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3994       else:
3995         result = result and not rstats.payload.is_degraded
3996
3997   if dev.children:
3998     for child in dev.children:
3999       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4000
4001   return result
4002
4003
4004 class LUOobCommand(NoHooksLU):
4005   """Logical unit for OOB handling.
4006
4007   """
4008   REG_BGL = False
4009   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4010
4011   def ExpandNames(self):
4012     """Gather locks we need.
4013
4014     """
4015     if self.op.node_names:
4016       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4017       lock_names = self.op.node_names
4018     else:
4019       lock_names = locking.ALL_SET
4020
4021     self.needed_locks = {
4022       locking.LEVEL_NODE: lock_names,
4023       }
4024
4025   def CheckPrereq(self):
4026     """Check prerequisites.
4027
4028     This checks:
4029      - the node exists in the configuration
4030      - OOB is supported
4031
4032     Any errors are signaled by raising errors.OpPrereqError.
4033
4034     """
4035     self.nodes = []
4036     self.master_node = self.cfg.GetMasterNode()
4037
4038     assert self.op.power_delay >= 0.0
4039
4040     if self.op.node_names:
4041       if (self.op.command in self._SKIP_MASTER and
4042           self.master_node in self.op.node_names):
4043         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4044         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4045
4046         if master_oob_handler:
4047           additional_text = ("run '%s %s %s' if you want to operate on the"
4048                              " master regardless") % (master_oob_handler,
4049                                                       self.op.command,
4050                                                       self.master_node)
4051         else:
4052           additional_text = "it does not support out-of-band operations"
4053
4054         raise errors.OpPrereqError(("Operating on the master node %s is not"
4055                                     " allowed for %s; %s") %
4056                                    (self.master_node, self.op.command,
4057                                     additional_text), errors.ECODE_INVAL)
4058     else:
4059       self.op.node_names = self.cfg.GetNodeList()
4060       if self.op.command in self._SKIP_MASTER:
4061         self.op.node_names.remove(self.master_node)
4062
4063     if self.op.command in self._SKIP_MASTER:
4064       assert self.master_node not in self.op.node_names
4065
4066     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4067       if node is None:
4068         raise errors.OpPrereqError("Node %s not found" % node_name,
4069                                    errors.ECODE_NOENT)
4070       else:
4071         self.nodes.append(node)
4072
4073       if (not self.op.ignore_status and
4074           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4075         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4076                                     " not marked offline") % node_name,
4077                                    errors.ECODE_STATE)
4078
4079   def Exec(self, feedback_fn):
4080     """Execute OOB and return result if we expect any.
4081
4082     """
4083     master_node = self.master_node
4084     ret = []
4085
4086     for idx, node in enumerate(utils.NiceSort(self.nodes,
4087                                               key=lambda node: node.name)):
4088       node_entry = [(constants.RS_NORMAL, node.name)]
4089       ret.append(node_entry)
4090
4091       oob_program = _SupportsOob(self.cfg, node)
4092
4093       if not oob_program:
4094         node_entry.append((constants.RS_UNAVAIL, None))
4095         continue
4096
4097       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4098                    self.op.command, oob_program, node.name)
4099       result = self.rpc.call_run_oob(master_node, oob_program,
4100                                      self.op.command, node.name,
4101                                      self.op.timeout)
4102
4103       if result.fail_msg:
4104         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4105                         node.name, result.fail_msg)
4106         node_entry.append((constants.RS_NODATA, None))
4107       else:
4108         try:
4109           self._CheckPayload(result)
4110         except errors.OpExecError, err:
4111           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4112                           node.name, err)
4113           node_entry.append((constants.RS_NODATA, None))
4114         else:
4115           if self.op.command == constants.OOB_HEALTH:
4116             # For health we should log important events
4117             for item, status in result.payload:
4118               if status in [constants.OOB_STATUS_WARNING,
4119                             constants.OOB_STATUS_CRITICAL]:
4120                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4121                                 item, node.name, status)
4122
4123           if self.op.command == constants.OOB_POWER_ON:
4124             node.powered = True
4125           elif self.op.command == constants.OOB_POWER_OFF:
4126             node.powered = False
4127           elif self.op.command == constants.OOB_POWER_STATUS:
4128             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4129             if powered != node.powered:
4130               logging.warning(("Recorded power state (%s) of node '%s' does not"
4131                                " match actual power state (%s)"), node.powered,
4132                               node.name, powered)
4133
4134           # For configuration changing commands we should update the node
4135           if self.op.command in (constants.OOB_POWER_ON,
4136                                  constants.OOB_POWER_OFF):
4137             self.cfg.Update(node, feedback_fn)
4138
4139           node_entry.append((constants.RS_NORMAL, result.payload))
4140
4141           if (self.op.command == constants.OOB_POWER_ON and
4142               idx < len(self.nodes) - 1):
4143             time.sleep(self.op.power_delay)
4144
4145     return ret
4146
4147   def _CheckPayload(self, result):
4148     """Checks if the payload is valid.
4149
4150     @param result: RPC result
4151     @raises errors.OpExecError: If payload is not valid
4152
4153     """
4154     errs = []
4155     if self.op.command == constants.OOB_HEALTH:
4156       if not isinstance(result.payload, list):
4157         errs.append("command 'health' is expected to return a list but got %s" %
4158                     type(result.payload))
4159       else:
4160         for item, status in result.payload:
4161           if status not in constants.OOB_STATUSES:
4162             errs.append("health item '%s' has invalid status '%s'" %
4163                         (item, status))
4164
4165     if self.op.command == constants.OOB_POWER_STATUS:
4166       if not isinstance(result.payload, dict):
4167         errs.append("power-status is expected to return a dict but got %s" %
4168                     type(result.payload))
4169
4170     if self.op.command in [
4171         constants.OOB_POWER_ON,
4172         constants.OOB_POWER_OFF,
4173         constants.OOB_POWER_CYCLE,
4174         ]:
4175       if result.payload is not None:
4176         errs.append("%s is expected to not return payload but got '%s'" %
4177                     (self.op.command, result.payload))
4178
4179     if errs:
4180       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4181                                utils.CommaJoin(errs))
4182
4183
4184 class _OsQuery(_QueryBase):
4185   FIELDS = query.OS_FIELDS
4186
4187   def ExpandNames(self, lu):
4188     # Lock all nodes in shared mode
4189     # Temporary removal of locks, should be reverted later
4190     # TODO: reintroduce locks when they are lighter-weight
4191     lu.needed_locks = {}
4192     #self.share_locks[locking.LEVEL_NODE] = 1
4193     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4194
4195     # The following variables interact with _QueryBase._GetNames
4196     if self.names:
4197       self.wanted = self.names
4198     else:
4199       self.wanted = locking.ALL_SET
4200
4201     self.do_locking = self.use_locking
4202
4203   def DeclareLocks(self, lu, level):
4204     pass
4205
4206   @staticmethod
4207   def _DiagnoseByOS(rlist):
4208     """Remaps a per-node return list into an a per-os per-node dictionary
4209
4210     @param rlist: a map with node names as keys and OS objects as values
4211
4212     @rtype: dict
4213     @return: a dictionary with osnames as keys and as value another
4214         map, with nodes as keys and tuples of (path, status, diagnose,
4215         variants, parameters, api_versions) as values, eg::
4216
4217           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4218                                      (/srv/..., False, "invalid api")],
4219                            "node2": [(/srv/..., True, "", [], [])]}
4220           }
4221
4222     """
4223     all_os = {}
4224     # we build here the list of nodes that didn't fail the RPC (at RPC
4225     # level), so that nodes with a non-responding node daemon don't
4226     # make all OSes invalid
4227     good_nodes = [node_name for node_name in rlist
4228                   if not rlist[node_name].fail_msg]
4229     for node_name, nr in rlist.items():
4230       if nr.fail_msg or not nr.payload:
4231         continue
4232       for (name, path, status, diagnose, variants,
4233            params, api_versions) in nr.payload:
4234         if name not in all_os:
4235           # build a list of nodes for this os containing empty lists
4236           # for each node in node_list
4237           all_os[name] = {}
4238           for nname in good_nodes:
4239             all_os[name][nname] = []
4240         # convert params from [name, help] to (name, help)
4241         params = [tuple(v) for v in params]
4242         all_os[name][node_name].append((path, status, diagnose,
4243                                         variants, params, api_versions))
4244     return all_os
4245
4246   def _GetQueryData(self, lu):
4247     """Computes the list of nodes and their attributes.
4248
4249     """
4250     # Locking is not used
4251     assert not (compat.any(lu.glm.is_owned(level)
4252                            for level in locking.LEVELS
4253                            if level != locking.LEVEL_CLUSTER) or
4254                 self.do_locking or self.use_locking)
4255
4256     valid_nodes = [node.name
4257                    for node in lu.cfg.GetAllNodesInfo().values()
4258                    if not node.offline and node.vm_capable]
4259     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4260     cluster = lu.cfg.GetClusterInfo()
4261
4262     data = {}
4263
4264     for (os_name, os_data) in pol.items():
4265       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4266                           hidden=(os_name in cluster.hidden_os),
4267                           blacklisted=(os_name in cluster.blacklisted_os))
4268
4269       variants = set()
4270       parameters = set()
4271       api_versions = set()
4272
4273       for idx, osl in enumerate(os_data.values()):
4274         info.valid = bool(info.valid and osl and osl[0][1])
4275         if not info.valid:
4276           break
4277
4278         (node_variants, node_params, node_api) = osl[0][3:6]
4279         if idx == 0:
4280           # First entry
4281           variants.update(node_variants)
4282           parameters.update(node_params)
4283           api_versions.update(node_api)
4284         else:
4285           # Filter out inconsistent values
4286           variants.intersection_update(node_variants)
4287           parameters.intersection_update(node_params)
4288           api_versions.intersection_update(node_api)
4289
4290       info.variants = list(variants)
4291       info.parameters = list(parameters)
4292       info.api_versions = list(api_versions)
4293
4294       data[os_name] = info
4295
4296     # Prepare data in requested order
4297     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4298             if name in data]
4299
4300
4301 class LUOsDiagnose(NoHooksLU):
4302   """Logical unit for OS diagnose/query.
4303
4304   """
4305   REQ_BGL = False
4306
4307   @staticmethod
4308   def _BuildFilter(fields, names):
4309     """Builds a filter for querying OSes.
4310
4311     """
4312     name_filter = qlang.MakeSimpleFilter("name", names)
4313
4314     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4315     # respective field is not requested
4316     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4317                      for fname in ["hidden", "blacklisted"]
4318                      if fname not in fields]
4319     if "valid" not in fields:
4320       status_filter.append([qlang.OP_TRUE, "valid"])
4321
4322     if status_filter:
4323       status_filter.insert(0, qlang.OP_AND)
4324     else:
4325       status_filter = None
4326
4327     if name_filter and status_filter:
4328       return [qlang.OP_AND, name_filter, status_filter]
4329     elif name_filter:
4330       return name_filter
4331     else:
4332       return status_filter
4333
4334   def CheckArguments(self):
4335     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4336                        self.op.output_fields, False)
4337
4338   def ExpandNames(self):
4339     self.oq.ExpandNames(self)
4340
4341   def Exec(self, feedback_fn):
4342     return self.oq.OldStyleQuery(self)
4343
4344
4345 class LUNodeRemove(LogicalUnit):
4346   """Logical unit for removing a node.
4347
4348   """
4349   HPATH = "node-remove"
4350   HTYPE = constants.HTYPE_NODE
4351
4352   def BuildHooksEnv(self):
4353     """Build hooks env.
4354
4355     This doesn't run on the target node in the pre phase as a failed
4356     node would then be impossible to remove.
4357
4358     """
4359     return {
4360       "OP_TARGET": self.op.node_name,
4361       "NODE_NAME": self.op.node_name,
4362       }
4363
4364   def BuildHooksNodes(self):
4365     """Build hooks nodes.
4366
4367     """
4368     all_nodes = self.cfg.GetNodeList()
4369     try:
4370       all_nodes.remove(self.op.node_name)
4371     except ValueError:
4372       logging.warning("Node '%s', which is about to be removed, was not found"
4373                       " in the list of all nodes", self.op.node_name)
4374     return (all_nodes, all_nodes)
4375
4376   def CheckPrereq(self):
4377     """Check prerequisites.
4378
4379     This checks:
4380      - the node exists in the configuration
4381      - it does not have primary or secondary instances
4382      - it's not the master
4383
4384     Any errors are signaled by raising errors.OpPrereqError.
4385
4386     """
4387     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4388     node = self.cfg.GetNodeInfo(self.op.node_name)
4389     assert node is not None
4390
4391     masternode = self.cfg.GetMasterNode()
4392     if node.name == masternode:
4393       raise errors.OpPrereqError("Node is the master node, failover to another"
4394                                  " node is required", errors.ECODE_INVAL)
4395
4396     for instance_name, instance in self.cfg.GetAllInstancesInfo():
4397       if node.name in instance.all_nodes:
4398         raise errors.OpPrereqError("Instance %s is still running on the node,"
4399                                    " please remove first" % instance_name,
4400                                    errors.ECODE_INVAL)
4401     self.op.node_name = node.name
4402     self.node = node
4403
4404   def Exec(self, feedback_fn):
4405     """Removes the node from the cluster.
4406
4407     """
4408     node = self.node
4409     logging.info("Stopping the node daemon and removing configs from node %s",
4410                  node.name)
4411
4412     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4413
4414     # Promote nodes to master candidate as needed
4415     _AdjustCandidatePool(self, exceptions=[node.name])
4416     self.context.RemoveNode(node.name)
4417
4418     # Run post hooks on the node before it's removed
4419     _RunPostHook(self, node.name)
4420
4421     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4422     msg = result.fail_msg
4423     if msg:
4424       self.LogWarning("Errors encountered on the remote node while leaving"
4425                       " the cluster: %s", msg)
4426
4427     # Remove node from our /etc/hosts
4428     if self.cfg.GetClusterInfo().modify_etc_hosts:
4429       master_node = self.cfg.GetMasterNode()
4430       result = self.rpc.call_etc_hosts_modify(master_node,
4431                                               constants.ETC_HOSTS_REMOVE,
4432                                               node.name, None)
4433       result.Raise("Can't update hosts file with new host data")
4434       _RedistributeAncillaryFiles(self)
4435
4436
4437 class _NodeQuery(_QueryBase):
4438   FIELDS = query.NODE_FIELDS
4439
4440   def ExpandNames(self, lu):
4441     lu.needed_locks = {}
4442     lu.share_locks = _ShareAll()
4443
4444     if self.names:
4445       self.wanted = _GetWantedNodes(lu, self.names)
4446     else:
4447       self.wanted = locking.ALL_SET
4448
4449     self.do_locking = (self.use_locking and
4450                        query.NQ_LIVE in self.requested_data)
4451
4452     if self.do_locking:
4453       # If any non-static field is requested we need to lock the nodes
4454       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4455
4456   def DeclareLocks(self, lu, level):
4457     pass
4458
4459   def _GetQueryData(self, lu):
4460     """Computes the list of nodes and their attributes.
4461
4462     """
4463     all_info = lu.cfg.GetAllNodesInfo()
4464
4465     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4466
4467     # Gather data as requested
4468     if query.NQ_LIVE in self.requested_data:
4469       # filter out non-vm_capable nodes
4470       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4471
4472       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4473                                         lu.cfg.GetHypervisorType())
4474       live_data = dict((name, nresult.payload)
4475                        for (name, nresult) in node_data.items()
4476                        if not nresult.fail_msg and nresult.payload)
4477     else:
4478       live_data = None
4479
4480     if query.NQ_INST in self.requested_data:
4481       node_to_primary = dict([(name, set()) for name in nodenames])
4482       node_to_secondary = dict([(name, set()) for name in nodenames])
4483
4484       inst_data = lu.cfg.GetAllInstancesInfo()
4485
4486       for inst in inst_data.values():
4487         if inst.primary_node in node_to_primary:
4488           node_to_primary[inst.primary_node].add(inst.name)
4489         for secnode in inst.secondary_nodes:
4490           if secnode in node_to_secondary:
4491             node_to_secondary[secnode].add(inst.name)
4492     else:
4493       node_to_primary = None
4494       node_to_secondary = None
4495
4496     if query.NQ_OOB in self.requested_data:
4497       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4498                          for name, node in all_info.iteritems())
4499     else:
4500       oob_support = None
4501
4502     if query.NQ_GROUP in self.requested_data:
4503       groups = lu.cfg.GetAllNodeGroupsInfo()
4504     else:
4505       groups = {}
4506
4507     return query.NodeQueryData([all_info[name] for name in nodenames],
4508                                live_data, lu.cfg.GetMasterNode(),
4509                                node_to_primary, node_to_secondary, groups,
4510                                oob_support, lu.cfg.GetClusterInfo())
4511
4512
4513 class LUNodeQuery(NoHooksLU):
4514   """Logical unit for querying nodes.
4515
4516   """
4517   # pylint: disable=W0142
4518   REQ_BGL = False
4519
4520   def CheckArguments(self):
4521     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4522                          self.op.output_fields, self.op.use_locking)
4523
4524   def ExpandNames(self):
4525     self.nq.ExpandNames(self)
4526
4527   def Exec(self, feedback_fn):
4528     return self.nq.OldStyleQuery(self)
4529
4530
4531 class LUNodeQueryvols(NoHooksLU):
4532   """Logical unit for getting volumes on node(s).
4533
4534   """
4535   REQ_BGL = False
4536   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4537   _FIELDS_STATIC = utils.FieldSet("node")
4538
4539   def CheckArguments(self):
4540     _CheckOutputFields(static=self._FIELDS_STATIC,
4541                        dynamic=self._FIELDS_DYNAMIC,
4542                        selected=self.op.output_fields)
4543
4544   def ExpandNames(self):
4545     self.needed_locks = {}
4546     self.share_locks[locking.LEVEL_NODE] = 1
4547     if not self.op.nodes:
4548       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4549     else:
4550       self.needed_locks[locking.LEVEL_NODE] = \
4551         _GetWantedNodes(self, self.op.nodes)
4552
4553   def Exec(self, feedback_fn):
4554     """Computes the list of nodes and their attributes.
4555
4556     """
4557     nodenames = self.owned_locks(locking.LEVEL_NODE)
4558     volumes = self.rpc.call_node_volumes(nodenames)
4559
4560     ilist = self.cfg.GetAllInstancesInfo()
4561     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4562
4563     output = []
4564     for node in nodenames:
4565       nresult = volumes[node]
4566       if nresult.offline:
4567         continue
4568       msg = nresult.fail_msg
4569       if msg:
4570         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4571         continue
4572
4573       node_vols = sorted(nresult.payload,
4574                          key=operator.itemgetter("dev"))
4575
4576       for vol in node_vols:
4577         node_output = []
4578         for field in self.op.output_fields:
4579           if field == "node":
4580             val = node
4581           elif field == "phys":
4582             val = vol["dev"]
4583           elif field == "vg":
4584             val = vol["vg"]
4585           elif field == "name":
4586             val = vol["name"]
4587           elif field == "size":
4588             val = int(float(vol["size"]))
4589           elif field == "instance":
4590             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4591           else:
4592             raise errors.ParameterError(field)
4593           node_output.append(str(val))
4594
4595         output.append(node_output)
4596
4597     return output
4598
4599
4600 class LUNodeQueryStorage(NoHooksLU):
4601   """Logical unit for getting information on storage units on node(s).
4602
4603   """
4604   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4605   REQ_BGL = False
4606
4607   def CheckArguments(self):
4608     _CheckOutputFields(static=self._FIELDS_STATIC,
4609                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4610                        selected=self.op.output_fields)
4611
4612   def ExpandNames(self):
4613     self.needed_locks = {}
4614     self.share_locks[locking.LEVEL_NODE] = 1
4615
4616     if self.op.nodes:
4617       self.needed_locks[locking.LEVEL_NODE] = \
4618         _GetWantedNodes(self, self.op.nodes)
4619     else:
4620       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4621
4622   def Exec(self, feedback_fn):
4623     """Computes the list of nodes and their attributes.
4624
4625     """
4626     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4627
4628     # Always get name to sort by
4629     if constants.SF_NAME in self.op.output_fields:
4630       fields = self.op.output_fields[:]
4631     else:
4632       fields = [constants.SF_NAME] + self.op.output_fields
4633
4634     # Never ask for node or type as it's only known to the LU
4635     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4636       while extra in fields:
4637         fields.remove(extra)
4638
4639     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4640     name_idx = field_idx[constants.SF_NAME]
4641
4642     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4643     data = self.rpc.call_storage_list(self.nodes,
4644                                       self.op.storage_type, st_args,
4645                                       self.op.name, fields)
4646
4647     result = []
4648
4649     for node in utils.NiceSort(self.nodes):
4650       nresult = data[node]
4651       if nresult.offline:
4652         continue
4653
4654       msg = nresult.fail_msg
4655       if msg:
4656         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4657         continue
4658
4659       rows = dict([(row[name_idx], row) for row in nresult.payload])
4660
4661       for name in utils.NiceSort(rows.keys()):
4662         row = rows[name]
4663
4664         out = []
4665
4666         for field in self.op.output_fields:
4667           if field == constants.SF_NODE:
4668             val = node
4669           elif field == constants.SF_TYPE:
4670             val = self.op.storage_type
4671           elif field in field_idx:
4672             val = row[field_idx[field]]
4673           else:
4674             raise errors.ParameterError(field)
4675
4676           out.append(val)
4677
4678         result.append(out)
4679
4680     return result
4681
4682
4683 class _InstanceQuery(_QueryBase):
4684   FIELDS = query.INSTANCE_FIELDS
4685
4686   def ExpandNames(self, lu):
4687     lu.needed_locks = {}
4688     lu.share_locks = _ShareAll()
4689
4690     if self.names:
4691       self.wanted = _GetWantedInstances(lu, self.names)
4692     else:
4693       self.wanted = locking.ALL_SET
4694
4695     self.do_locking = (self.use_locking and
4696                        query.IQ_LIVE in self.requested_data)
4697     if self.do_locking:
4698       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4699       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4700       lu.needed_locks[locking.LEVEL_NODE] = []
4701       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4702
4703     self.do_grouplocks = (self.do_locking and
4704                           query.IQ_NODES in self.requested_data)
4705
4706   def DeclareLocks(self, lu, level):
4707     if self.do_locking:
4708       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4709         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4710
4711         # Lock all groups used by instances optimistically; this requires going
4712         # via the node before it's locked, requiring verification later on
4713         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4714           set(group_uuid
4715               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4716               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4717       elif level == locking.LEVEL_NODE:
4718         lu._LockInstancesNodes() # pylint: disable=W0212
4719
4720   @staticmethod
4721   def _CheckGroupLocks(lu):
4722     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4723     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4724
4725     # Check if node groups for locked instances are still correct
4726     for instance_name in owned_instances:
4727       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4728
4729   def _GetQueryData(self, lu):
4730     """Computes the list of instances and their attributes.
4731
4732     """
4733     if self.do_grouplocks:
4734       self._CheckGroupLocks(lu)
4735
4736     cluster = lu.cfg.GetClusterInfo()
4737     all_info = lu.cfg.GetAllInstancesInfo()
4738
4739     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4740
4741     instance_list = [all_info[name] for name in instance_names]
4742     nodes = frozenset(itertools.chain(*(inst.all_nodes
4743                                         for inst in instance_list)))
4744     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4745     bad_nodes = []
4746     offline_nodes = []
4747     wrongnode_inst = set()
4748
4749     # Gather data as requested
4750     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4751       live_data = {}
4752       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4753       for name in nodes:
4754         result = node_data[name]
4755         if result.offline:
4756           # offline nodes will be in both lists
4757           assert result.fail_msg
4758           offline_nodes.append(name)
4759         if result.fail_msg:
4760           bad_nodes.append(name)
4761         elif result.payload:
4762           for inst in result.payload:
4763             if inst in all_info:
4764               if all_info[inst].primary_node == name:
4765                 live_data.update(result.payload)
4766               else:
4767                 wrongnode_inst.add(inst)
4768             else:
4769               # orphan instance; we don't list it here as we don't
4770               # handle this case yet in the output of instance listing
4771               logging.warning("Orphan instance '%s' found on node %s",
4772                               inst, name)
4773         # else no instance is alive
4774     else:
4775       live_data = {}
4776
4777     if query.IQ_DISKUSAGE in self.requested_data:
4778       disk_usage = dict((inst.name,
4779                          _ComputeDiskSize(inst.disk_template,
4780                                           [{constants.IDISK_SIZE: disk.size}
4781                                            for disk in inst.disks]))
4782                         for inst in instance_list)
4783     else:
4784       disk_usage = None
4785
4786     if query.IQ_CONSOLE in self.requested_data:
4787       consinfo = {}
4788       for inst in instance_list:
4789         if inst.name in live_data:
4790           # Instance is running
4791           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4792         else:
4793           consinfo[inst.name] = None
4794       assert set(consinfo.keys()) == set(instance_names)
4795     else:
4796       consinfo = None
4797
4798     if query.IQ_NODES in self.requested_data:
4799       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4800                                             instance_list)))
4801       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4802       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4803                     for uuid in set(map(operator.attrgetter("group"),
4804                                         nodes.values())))
4805     else:
4806       nodes = None
4807       groups = None
4808
4809     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4810                                    disk_usage, offline_nodes, bad_nodes,
4811                                    live_data, wrongnode_inst, consinfo,
4812                                    nodes, groups)
4813
4814
4815 class LUQuery(NoHooksLU):
4816   """Query for resources/items of a certain kind.
4817
4818   """
4819   # pylint: disable=W0142
4820   REQ_BGL = False
4821
4822   def CheckArguments(self):
4823     qcls = _GetQueryImplementation(self.op.what)
4824
4825     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4826
4827   def ExpandNames(self):
4828     self.impl.ExpandNames(self)
4829
4830   def DeclareLocks(self, level):
4831     self.impl.DeclareLocks(self, level)
4832
4833   def Exec(self, feedback_fn):
4834     return self.impl.NewStyleQuery(self)
4835
4836
4837 class LUQueryFields(NoHooksLU):
4838   """Query for resources/items of a certain kind.
4839
4840   """
4841   # pylint: disable=W0142
4842   REQ_BGL = False
4843
4844   def CheckArguments(self):
4845     self.qcls = _GetQueryImplementation(self.op.what)
4846
4847   def ExpandNames(self):
4848     self.needed_locks = {}
4849
4850   def Exec(self, feedback_fn):
4851     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4852
4853
4854 class LUNodeModifyStorage(NoHooksLU):
4855   """Logical unit for modifying a storage volume on a node.
4856
4857   """
4858   REQ_BGL = False
4859
4860   def CheckArguments(self):
4861     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4862
4863     storage_type = self.op.storage_type
4864
4865     try:
4866       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4867     except KeyError:
4868       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4869                                  " modified" % storage_type,
4870                                  errors.ECODE_INVAL)
4871
4872     diff = set(self.op.changes.keys()) - modifiable
4873     if diff:
4874       raise errors.OpPrereqError("The following fields can not be modified for"
4875                                  " storage units of type '%s': %r" %
4876                                  (storage_type, list(diff)),
4877                                  errors.ECODE_INVAL)
4878
4879   def ExpandNames(self):
4880     self.needed_locks = {
4881       locking.LEVEL_NODE: self.op.node_name,
4882       }
4883
4884   def Exec(self, feedback_fn):
4885     """Computes the list of nodes and their attributes.
4886
4887     """
4888     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4889     result = self.rpc.call_storage_modify(self.op.node_name,
4890                                           self.op.storage_type, st_args,
4891                                           self.op.name, self.op.changes)
4892     result.Raise("Failed to modify storage unit '%s' on %s" %
4893                  (self.op.name, self.op.node_name))
4894
4895
4896 class LUNodeAdd(LogicalUnit):
4897   """Logical unit for adding node to the cluster.
4898
4899   """
4900   HPATH = "node-add"
4901   HTYPE = constants.HTYPE_NODE
4902   _NFLAGS = ["master_capable", "vm_capable"]
4903
4904   def CheckArguments(self):
4905     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4906     # validate/normalize the node name
4907     self.hostname = netutils.GetHostname(name=self.op.node_name,
4908                                          family=self.primary_ip_family)
4909     self.op.node_name = self.hostname.name
4910
4911     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4912       raise errors.OpPrereqError("Cannot readd the master node",
4913                                  errors.ECODE_STATE)
4914
4915     if self.op.readd and self.op.group:
4916       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4917                                  " being readded", errors.ECODE_INVAL)
4918
4919   def BuildHooksEnv(self):
4920     """Build hooks env.
4921
4922     This will run on all nodes before, and on all nodes + the new node after.
4923
4924     """
4925     return {
4926       "OP_TARGET": self.op.node_name,
4927       "NODE_NAME": self.op.node_name,
4928       "NODE_PIP": self.op.primary_ip,
4929       "NODE_SIP": self.op.secondary_ip,
4930       "MASTER_CAPABLE": str(self.op.master_capable),
4931       "VM_CAPABLE": str(self.op.vm_capable),
4932       }
4933
4934   def BuildHooksNodes(self):
4935     """Build hooks nodes.
4936
4937     """
4938     # Exclude added node
4939     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4940     post_nodes = pre_nodes + [self.op.node_name, ]
4941
4942     return (pre_nodes, post_nodes)
4943
4944   def CheckPrereq(self):
4945     """Check prerequisites.
4946
4947     This checks:
4948      - the new node is not already in the config
4949      - it is resolvable
4950      - its parameters (single/dual homed) matches the cluster
4951
4952     Any errors are signaled by raising errors.OpPrereqError.
4953
4954     """
4955     cfg = self.cfg
4956     hostname = self.hostname
4957     node = hostname.name
4958     primary_ip = self.op.primary_ip = hostname.ip
4959     if self.op.secondary_ip is None:
4960       if self.primary_ip_family == netutils.IP6Address.family:
4961         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4962                                    " IPv4 address must be given as secondary",
4963                                    errors.ECODE_INVAL)
4964       self.op.secondary_ip = primary_ip
4965
4966     secondary_ip = self.op.secondary_ip
4967     if not netutils.IP4Address.IsValid(secondary_ip):
4968       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4969                                  " address" % secondary_ip, errors.ECODE_INVAL)
4970
4971     node_list = cfg.GetNodeList()
4972     if not self.op.readd and node in node_list:
4973       raise errors.OpPrereqError("Node %s is already in the configuration" %
4974                                  node, errors.ECODE_EXISTS)
4975     elif self.op.readd and node not in node_list:
4976       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4977                                  errors.ECODE_NOENT)
4978
4979     self.changed_primary_ip = False
4980
4981     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4982       if self.op.readd and node == existing_node_name:
4983         if existing_node.secondary_ip != secondary_ip:
4984           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4985                                      " address configuration as before",
4986                                      errors.ECODE_INVAL)
4987         if existing_node.primary_ip != primary_ip:
4988           self.changed_primary_ip = True
4989
4990         continue
4991
4992       if (existing_node.primary_ip == primary_ip or
4993           existing_node.secondary_ip == primary_ip or
4994           existing_node.primary_ip == secondary_ip or
4995           existing_node.secondary_ip == secondary_ip):
4996         raise errors.OpPrereqError("New node ip address(es) conflict with"
4997                                    " existing node %s" % existing_node.name,
4998                                    errors.ECODE_NOTUNIQUE)
4999
5000     # After this 'if' block, None is no longer a valid value for the
5001     # _capable op attributes
5002     if self.op.readd:
5003       old_node = self.cfg.GetNodeInfo(node)
5004       assert old_node is not None, "Can't retrieve locked node %s" % node
5005       for attr in self._NFLAGS:
5006         if getattr(self.op, attr) is None:
5007           setattr(self.op, attr, getattr(old_node, attr))
5008     else:
5009       for attr in self._NFLAGS:
5010         if getattr(self.op, attr) is None:
5011           setattr(self.op, attr, True)
5012
5013     if self.op.readd and not self.op.vm_capable:
5014       pri, sec = cfg.GetNodeInstances(node)
5015       if pri or sec:
5016         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5017                                    " flag set to false, but it already holds"
5018                                    " instances" % node,
5019                                    errors.ECODE_STATE)
5020
5021     # check that the type of the node (single versus dual homed) is the
5022     # same as for the master
5023     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5024     master_singlehomed = myself.secondary_ip == myself.primary_ip
5025     newbie_singlehomed = secondary_ip == primary_ip
5026     if master_singlehomed != newbie_singlehomed:
5027       if master_singlehomed:
5028         raise errors.OpPrereqError("The master has no secondary ip but the"
5029                                    " new node has one",
5030                                    errors.ECODE_INVAL)
5031       else:
5032         raise errors.OpPrereqError("The master has a secondary ip but the"
5033                                    " new node doesn't have one",
5034                                    errors.ECODE_INVAL)
5035
5036     # checks reachability
5037     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5038       raise errors.OpPrereqError("Node not reachable by ping",
5039                                  errors.ECODE_ENVIRON)
5040
5041     if not newbie_singlehomed:
5042       # check reachability from my secondary ip to newbie's secondary ip
5043       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5044                            source=myself.secondary_ip):
5045         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5046                                    " based ping to node daemon port",
5047                                    errors.ECODE_ENVIRON)
5048
5049     if self.op.readd:
5050       exceptions = [node]
5051     else:
5052       exceptions = []
5053
5054     if self.op.master_capable:
5055       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5056     else:
5057       self.master_candidate = False
5058
5059     if self.op.readd:
5060       self.new_node = old_node
5061     else:
5062       node_group = cfg.LookupNodeGroup(self.op.group)
5063       self.new_node = objects.Node(name=node,
5064                                    primary_ip=primary_ip,
5065                                    secondary_ip=secondary_ip,
5066                                    master_candidate=self.master_candidate,
5067                                    offline=False, drained=False,
5068                                    group=node_group)
5069
5070     if self.op.ndparams:
5071       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5072
5073   def Exec(self, feedback_fn):
5074     """Adds the new node to the cluster.
5075
5076     """
5077     new_node = self.new_node
5078     node = new_node.name
5079
5080     # We adding a new node so we assume it's powered
5081     new_node.powered = True
5082
5083     # for re-adds, reset the offline/drained/master-candidate flags;
5084     # we need to reset here, otherwise offline would prevent RPC calls
5085     # later in the procedure; this also means that if the re-add
5086     # fails, we are left with a non-offlined, broken node
5087     if self.op.readd:
5088       new_node.drained = new_node.offline = False # pylint: disable=W0201
5089       self.LogInfo("Readding a node, the offline/drained flags were reset")
5090       # if we demote the node, we do cleanup later in the procedure
5091       new_node.master_candidate = self.master_candidate
5092       if self.changed_primary_ip:
5093         new_node.primary_ip = self.op.primary_ip
5094
5095     # copy the master/vm_capable flags
5096     for attr in self._NFLAGS:
5097       setattr(new_node, attr, getattr(self.op, attr))
5098
5099     # notify the user about any possible mc promotion
5100     if new_node.master_candidate:
5101       self.LogInfo("Node will be a master candidate")
5102
5103     if self.op.ndparams:
5104       new_node.ndparams = self.op.ndparams
5105     else:
5106       new_node.ndparams = {}
5107
5108     # check connectivity
5109     result = self.rpc.call_version([node])[node]
5110     result.Raise("Can't get version information from node %s" % node)
5111     if constants.PROTOCOL_VERSION == result.payload:
5112       logging.info("Communication to node %s fine, sw version %s match",
5113                    node, result.payload)
5114     else:
5115       raise errors.OpExecError("Version mismatch master version %s,"
5116                                " node version %s" %
5117                                (constants.PROTOCOL_VERSION, result.payload))
5118
5119     # Add node to our /etc/hosts, and add key to known_hosts
5120     if self.cfg.GetClusterInfo().modify_etc_hosts:
5121       master_node = self.cfg.GetMasterNode()
5122       result = self.rpc.call_etc_hosts_modify(master_node,
5123                                               constants.ETC_HOSTS_ADD,
5124                                               self.hostname.name,
5125                                               self.hostname.ip)
5126       result.Raise("Can't update hosts file with new host data")
5127
5128     if new_node.secondary_ip != new_node.primary_ip:
5129       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5130                                False)
5131
5132     node_verify_list = [self.cfg.GetMasterNode()]
5133     node_verify_param = {
5134       constants.NV_NODELIST: ([node], {}),
5135       # TODO: do a node-net-test as well?
5136     }
5137
5138     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5139                                        self.cfg.GetClusterName())
5140     for verifier in node_verify_list:
5141       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5142       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5143       if nl_payload:
5144         for failed in nl_payload:
5145           feedback_fn("ssh/hostname verification failed"
5146                       " (checking from %s): %s" %
5147                       (verifier, nl_payload[failed]))
5148         raise errors.OpExecError("ssh/hostname verification failed")
5149
5150     if self.op.readd:
5151       _RedistributeAncillaryFiles(self)
5152       self.context.ReaddNode(new_node)
5153       # make sure we redistribute the config
5154       self.cfg.Update(new_node, feedback_fn)
5155       # and make sure the new node will not have old files around
5156       if not new_node.master_candidate:
5157         result = self.rpc.call_node_demote_from_mc(new_node.name)
5158         msg = result.fail_msg
5159         if msg:
5160           self.LogWarning("Node failed to demote itself from master"
5161                           " candidate status: %s" % msg)
5162     else:
5163       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5164                                   additional_vm=self.op.vm_capable)
5165       self.context.AddNode(new_node, self.proc.GetECId())
5166
5167
5168 class LUNodeSetParams(LogicalUnit):
5169   """Modifies the parameters of a node.
5170
5171   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5172       to the node role (as _ROLE_*)
5173   @cvar _R2F: a dictionary from node role to tuples of flags
5174   @cvar _FLAGS: a list of attribute names corresponding to the flags
5175
5176   """
5177   HPATH = "node-modify"
5178   HTYPE = constants.HTYPE_NODE
5179   REQ_BGL = False
5180   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5181   _F2R = {
5182     (True, False, False): _ROLE_CANDIDATE,
5183     (False, True, False): _ROLE_DRAINED,
5184     (False, False, True): _ROLE_OFFLINE,
5185     (False, False, False): _ROLE_REGULAR,
5186     }
5187   _R2F = dict((v, k) for k, v in _F2R.items())
5188   _FLAGS = ["master_candidate", "drained", "offline"]
5189
5190   def CheckArguments(self):
5191     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5192     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5193                 self.op.master_capable, self.op.vm_capable,
5194                 self.op.secondary_ip, self.op.ndparams]
5195     if all_mods.count(None) == len(all_mods):
5196       raise errors.OpPrereqError("Please pass at least one modification",
5197                                  errors.ECODE_INVAL)
5198     if all_mods.count(True) > 1:
5199       raise errors.OpPrereqError("Can't set the node into more than one"
5200                                  " state at the same time",
5201                                  errors.ECODE_INVAL)
5202
5203     # Boolean value that tells us whether we might be demoting from MC
5204     self.might_demote = (self.op.master_candidate == False or
5205                          self.op.offline == True or
5206                          self.op.drained == True or
5207                          self.op.master_capable == False)
5208
5209     if self.op.secondary_ip:
5210       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5211         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5212                                    " address" % self.op.secondary_ip,
5213                                    errors.ECODE_INVAL)
5214
5215     self.lock_all = self.op.auto_promote and self.might_demote
5216     self.lock_instances = self.op.secondary_ip is not None
5217
5218   def ExpandNames(self):
5219     if self.lock_all:
5220       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5221     else:
5222       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5223
5224     if self.lock_instances:
5225       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5226
5227   def DeclareLocks(self, level):
5228     # If we have locked all instances, before waiting to lock nodes, release
5229     # all the ones living on nodes unrelated to the current operation.
5230     if level == locking.LEVEL_NODE and self.lock_instances:
5231       self.affected_instances = []
5232       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5233         instances_keep = []
5234
5235         # Build list of instances to release
5236         locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5237         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5238           if (instance.disk_template in constants.DTS_INT_MIRROR and
5239               self.op.node_name in instance.all_nodes):
5240             instances_keep.append(instance_name)
5241             self.affected_instances.append(instance)
5242
5243         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5244
5245         assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5246                 set(instances_keep))
5247
5248   def BuildHooksEnv(self):
5249     """Build hooks env.
5250
5251     This runs on the master node.
5252
5253     """
5254     return {
5255       "OP_TARGET": self.op.node_name,
5256       "MASTER_CANDIDATE": str(self.op.master_candidate),
5257       "OFFLINE": str(self.op.offline),
5258       "DRAINED": str(self.op.drained),
5259       "MASTER_CAPABLE": str(self.op.master_capable),
5260       "VM_CAPABLE": str(self.op.vm_capable),
5261       }
5262
5263   def BuildHooksNodes(self):
5264     """Build hooks nodes.
5265
5266     """
5267     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5268     return (nl, nl)
5269
5270   def CheckPrereq(self):
5271     """Check prerequisites.
5272
5273     This only checks the instance list against the existing names.
5274
5275     """
5276     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5277
5278     if (self.op.master_candidate is not None or
5279         self.op.drained is not None or
5280         self.op.offline is not None):
5281       # we can't change the master's node flags
5282       if self.op.node_name == self.cfg.GetMasterNode():
5283         raise errors.OpPrereqError("The master role can be changed"
5284                                    " only via master-failover",
5285                                    errors.ECODE_INVAL)
5286
5287     if self.op.master_candidate and not node.master_capable:
5288       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5289                                  " it a master candidate" % node.name,
5290                                  errors.ECODE_STATE)
5291
5292     if self.op.vm_capable == False:
5293       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5294       if ipri or isec:
5295         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5296                                    " the vm_capable flag" % node.name,
5297                                    errors.ECODE_STATE)
5298
5299     if node.master_candidate and self.might_demote and not self.lock_all:
5300       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5301       # check if after removing the current node, we're missing master
5302       # candidates
5303       (mc_remaining, mc_should, _) = \
5304           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5305       if mc_remaining < mc_should:
5306         raise errors.OpPrereqError("Not enough master candidates, please"
5307                                    " pass auto promote option to allow"
5308                                    " promotion", errors.ECODE_STATE)
5309
5310     self.old_flags = old_flags = (node.master_candidate,
5311                                   node.drained, node.offline)
5312     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5313     self.old_role = old_role = self._F2R[old_flags]
5314
5315     # Check for ineffective changes
5316     for attr in self._FLAGS:
5317       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5318         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5319         setattr(self.op, attr, None)
5320
5321     # Past this point, any flag change to False means a transition
5322     # away from the respective state, as only real changes are kept
5323
5324     # TODO: We might query the real power state if it supports OOB
5325     if _SupportsOob(self.cfg, node):
5326       if self.op.offline is False and not (node.powered or
5327                                            self.op.powered == True):
5328         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5329                                     " offline status can be reset") %
5330                                    self.op.node_name)
5331     elif self.op.powered is not None:
5332       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5333                                   " as it does not support out-of-band"
5334                                   " handling") % self.op.node_name)
5335
5336     # If we're being deofflined/drained, we'll MC ourself if needed
5337     if (self.op.drained == False or self.op.offline == False or
5338         (self.op.master_capable and not node.master_capable)):
5339       if _DecideSelfPromotion(self):
5340         self.op.master_candidate = True
5341         self.LogInfo("Auto-promoting node to master candidate")
5342
5343     # If we're no longer master capable, we'll demote ourselves from MC
5344     if self.op.master_capable == False and node.master_candidate:
5345       self.LogInfo("Demoting from master candidate")
5346       self.op.master_candidate = False
5347
5348     # Compute new role
5349     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5350     if self.op.master_candidate:
5351       new_role = self._ROLE_CANDIDATE
5352     elif self.op.drained:
5353       new_role = self._ROLE_DRAINED
5354     elif self.op.offline:
5355       new_role = self._ROLE_OFFLINE
5356     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5357       # False is still in new flags, which means we're un-setting (the
5358       # only) True flag
5359       new_role = self._ROLE_REGULAR
5360     else: # no new flags, nothing, keep old role
5361       new_role = old_role
5362
5363     self.new_role = new_role
5364
5365     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5366       # Trying to transition out of offline status
5367       result = self.rpc.call_version([node.name])[node.name]
5368       if result.fail_msg:
5369         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5370                                    " to report its version: %s" %
5371                                    (node.name, result.fail_msg),
5372                                    errors.ECODE_STATE)
5373       else:
5374         self.LogWarning("Transitioning node from offline to online state"
5375                         " without using re-add. Please make sure the node"
5376                         " is healthy!")
5377
5378     if self.op.secondary_ip:
5379       # Ok even without locking, because this can't be changed by any LU
5380       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5381       master_singlehomed = master.secondary_ip == master.primary_ip
5382       if master_singlehomed and self.op.secondary_ip:
5383         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5384                                    " homed cluster", errors.ECODE_INVAL)
5385
5386       if node.offline:
5387         if self.affected_instances:
5388           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5389                                      " node has instances (%s) configured"
5390                                      " to use it" % self.affected_instances)
5391       else:
5392         # On online nodes, check that no instances are running, and that
5393         # the node has the new ip and we can reach it.
5394         for instance in self.affected_instances:
5395           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5396
5397         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5398         if master.name != node.name:
5399           # check reachability from master secondary ip to new secondary ip
5400           if not netutils.TcpPing(self.op.secondary_ip,
5401                                   constants.DEFAULT_NODED_PORT,
5402                                   source=master.secondary_ip):
5403             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5404                                        " based ping to node daemon port",
5405                                        errors.ECODE_ENVIRON)
5406
5407     if self.op.ndparams:
5408       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5409       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5410       self.new_ndparams = new_ndparams
5411
5412   def Exec(self, feedback_fn):
5413     """Modifies a node.
5414
5415     """
5416     node = self.node
5417     old_role = self.old_role
5418     new_role = self.new_role
5419
5420     result = []
5421
5422     if self.op.ndparams:
5423       node.ndparams = self.new_ndparams
5424
5425     if self.op.powered is not None:
5426       node.powered = self.op.powered
5427
5428     for attr in ["master_capable", "vm_capable"]:
5429       val = getattr(self.op, attr)
5430       if val is not None:
5431         setattr(node, attr, val)
5432         result.append((attr, str(val)))
5433
5434     if new_role != old_role:
5435       # Tell the node to demote itself, if no longer MC and not offline
5436       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5437         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5438         if msg:
5439           self.LogWarning("Node failed to demote itself: %s", msg)
5440
5441       new_flags = self._R2F[new_role]
5442       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5443         if of != nf:
5444           result.append((desc, str(nf)))
5445       (node.master_candidate, node.drained, node.offline) = new_flags
5446
5447       # we locked all nodes, we adjust the CP before updating this node
5448       if self.lock_all:
5449         _AdjustCandidatePool(self, [node.name])
5450
5451     if self.op.secondary_ip:
5452       node.secondary_ip = self.op.secondary_ip
5453       result.append(("secondary_ip", self.op.secondary_ip))
5454
5455     # this will trigger configuration file update, if needed
5456     self.cfg.Update(node, feedback_fn)
5457
5458     # this will trigger job queue propagation or cleanup if the mc
5459     # flag changed
5460     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5461       self.context.ReaddNode(node)
5462
5463     return result
5464
5465
5466 class LUNodePowercycle(NoHooksLU):
5467   """Powercycles a node.
5468
5469   """
5470   REQ_BGL = False
5471
5472   def CheckArguments(self):
5473     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5474     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5475       raise errors.OpPrereqError("The node is the master and the force"
5476                                  " parameter was not set",
5477                                  errors.ECODE_INVAL)
5478
5479   def ExpandNames(self):
5480     """Locking for PowercycleNode.
5481
5482     This is a last-resort option and shouldn't block on other
5483     jobs. Therefore, we grab no locks.
5484
5485     """
5486     self.needed_locks = {}
5487
5488   def Exec(self, feedback_fn):
5489     """Reboots a node.
5490
5491     """
5492     result = self.rpc.call_node_powercycle(self.op.node_name,
5493                                            self.cfg.GetHypervisorType())
5494     result.Raise("Failed to schedule the reboot")
5495     return result.payload
5496
5497
5498 class LUClusterQuery(NoHooksLU):
5499   """Query cluster configuration.
5500
5501   """
5502   REQ_BGL = False
5503
5504   def ExpandNames(self):
5505     self.needed_locks = {}
5506
5507   def Exec(self, feedback_fn):
5508     """Return cluster config.
5509
5510     """
5511     cluster = self.cfg.GetClusterInfo()
5512     os_hvp = {}
5513
5514     # Filter just for enabled hypervisors
5515     for os_name, hv_dict in cluster.os_hvp.items():
5516       os_hvp[os_name] = {}
5517       for hv_name, hv_params in hv_dict.items():
5518         if hv_name in cluster.enabled_hypervisors:
5519           os_hvp[os_name][hv_name] = hv_params
5520
5521     # Convert ip_family to ip_version
5522     primary_ip_version = constants.IP4_VERSION
5523     if cluster.primary_ip_family == netutils.IP6Address.family:
5524       primary_ip_version = constants.IP6_VERSION
5525
5526     result = {
5527       "software_version": constants.RELEASE_VERSION,
5528       "protocol_version": constants.PROTOCOL_VERSION,
5529       "config_version": constants.CONFIG_VERSION,
5530       "os_api_version": max(constants.OS_API_VERSIONS),
5531       "export_version": constants.EXPORT_VERSION,
5532       "architecture": (platform.architecture()[0], platform.machine()),
5533       "name": cluster.cluster_name,
5534       "master": cluster.master_node,
5535       "default_hypervisor": cluster.enabled_hypervisors[0],
5536       "enabled_hypervisors": cluster.enabled_hypervisors,
5537       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5538                         for hypervisor_name in cluster.enabled_hypervisors]),
5539       "os_hvp": os_hvp,
5540       "beparams": cluster.beparams,
5541       "osparams": cluster.osparams,
5542       "nicparams": cluster.nicparams,
5543       "ndparams": cluster.ndparams,
5544       "candidate_pool_size": cluster.candidate_pool_size,
5545       "master_netdev": cluster.master_netdev,
5546       "master_netmask": cluster.master_netmask,
5547       "volume_group_name": cluster.volume_group_name,
5548       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5549       "file_storage_dir": cluster.file_storage_dir,
5550       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5551       "maintain_node_health": cluster.maintain_node_health,
5552       "ctime": cluster.ctime,
5553       "mtime": cluster.mtime,
5554       "uuid": cluster.uuid,
5555       "tags": list(cluster.GetTags()),
5556       "uid_pool": cluster.uid_pool,
5557       "default_iallocator": cluster.default_iallocator,
5558       "reserved_lvs": cluster.reserved_lvs,
5559       "primary_ip_version": primary_ip_version,
5560       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5561       "hidden_os": cluster.hidden_os,
5562       "blacklisted_os": cluster.blacklisted_os,
5563       }
5564
5565     return result
5566
5567
5568 class LUClusterConfigQuery(NoHooksLU):
5569   """Return configuration values.
5570
5571   """
5572   REQ_BGL = False
5573   _FIELDS_DYNAMIC = utils.FieldSet()
5574   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5575                                   "watcher_pause", "volume_group_name")
5576
5577   def CheckArguments(self):
5578     _CheckOutputFields(static=self._FIELDS_STATIC,
5579                        dynamic=self._FIELDS_DYNAMIC,
5580                        selected=self.op.output_fields)
5581
5582   def ExpandNames(self):
5583     self.needed_locks = {}
5584
5585   def Exec(self, feedback_fn):
5586     """Dump a representation of the cluster config to the standard output.
5587
5588     """
5589     values = []
5590     for field in self.op.output_fields:
5591       if field == "cluster_name":
5592         entry = self.cfg.GetClusterName()
5593       elif field == "master_node":
5594         entry = self.cfg.GetMasterNode()
5595       elif field == "drain_flag":
5596         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5597       elif field == "watcher_pause":
5598         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5599       elif field == "volume_group_name":
5600         entry = self.cfg.GetVGName()
5601       else:
5602         raise errors.ParameterError(field)
5603       values.append(entry)
5604     return values
5605
5606
5607 class LUInstanceActivateDisks(NoHooksLU):
5608   """Bring up an instance's disks.
5609
5610   """
5611   REQ_BGL = False
5612
5613   def ExpandNames(self):
5614     self._ExpandAndLockInstance()
5615     self.needed_locks[locking.LEVEL_NODE] = []
5616     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5617
5618   def DeclareLocks(self, level):
5619     if level == locking.LEVEL_NODE:
5620       self._LockInstancesNodes()
5621
5622   def CheckPrereq(self):
5623     """Check prerequisites.
5624
5625     This checks that the instance is in the cluster.
5626
5627     """
5628     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5629     assert self.instance is not None, \
5630       "Cannot retrieve locked instance %s" % self.op.instance_name
5631     _CheckNodeOnline(self, self.instance.primary_node)
5632
5633   def Exec(self, feedback_fn):
5634     """Activate the disks.
5635
5636     """
5637     disks_ok, disks_info = \
5638               _AssembleInstanceDisks(self, self.instance,
5639                                      ignore_size=self.op.ignore_size)
5640     if not disks_ok:
5641       raise errors.OpExecError("Cannot activate block devices")
5642
5643     return disks_info
5644
5645
5646 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5647                            ignore_size=False):
5648   """Prepare the block devices for an instance.
5649
5650   This sets up the block devices on all nodes.
5651
5652   @type lu: L{LogicalUnit}
5653   @param lu: the logical unit on whose behalf we execute
5654   @type instance: L{objects.Instance}
5655   @param instance: the instance for whose disks we assemble
5656   @type disks: list of L{objects.Disk} or None
5657   @param disks: which disks to assemble (or all, if None)
5658   @type ignore_secondaries: boolean
5659   @param ignore_secondaries: if true, errors on secondary nodes
5660       won't result in an error return from the function
5661   @type ignore_size: boolean
5662   @param ignore_size: if true, the current known size of the disk
5663       will not be used during the disk activation, useful for cases
5664       when the size is wrong
5665   @return: False if the operation failed, otherwise a list of
5666       (host, instance_visible_name, node_visible_name)
5667       with the mapping from node devices to instance devices
5668
5669   """
5670   device_info = []
5671   disks_ok = True
5672   iname = instance.name
5673   disks = _ExpandCheckDisks(instance, disks)
5674
5675   # With the two passes mechanism we try to reduce the window of
5676   # opportunity for the race condition of switching DRBD to primary
5677   # before handshaking occured, but we do not eliminate it
5678
5679   # The proper fix would be to wait (with some limits) until the
5680   # connection has been made and drbd transitions from WFConnection
5681   # into any other network-connected state (Connected, SyncTarget,
5682   # SyncSource, etc.)
5683
5684   # 1st pass, assemble on all nodes in secondary mode
5685   for idx, inst_disk in enumerate(disks):
5686     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5687       if ignore_size:
5688         node_disk = node_disk.Copy()
5689         node_disk.UnsetSize()
5690       lu.cfg.SetDiskID(node_disk, node)
5691       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5692       msg = result.fail_msg
5693       if msg:
5694         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5695                            " (is_primary=False, pass=1): %s",
5696                            inst_disk.iv_name, node, msg)
5697         if not ignore_secondaries:
5698           disks_ok = False
5699
5700   # FIXME: race condition on drbd migration to primary
5701
5702   # 2nd pass, do only the primary node
5703   for idx, inst_disk in enumerate(disks):
5704     dev_path = None
5705
5706     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5707       if node != instance.primary_node:
5708         continue
5709       if ignore_size:
5710         node_disk = node_disk.Copy()
5711         node_disk.UnsetSize()
5712       lu.cfg.SetDiskID(node_disk, node)
5713       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5714       msg = result.fail_msg
5715       if msg:
5716         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5717                            " (is_primary=True, pass=2): %s",
5718                            inst_disk.iv_name, node, msg)
5719         disks_ok = False
5720       else:
5721         dev_path = result.payload
5722
5723     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5724
5725   # leave the disks configured for the primary node
5726   # this is a workaround that would be fixed better by
5727   # improving the logical/physical id handling
5728   for disk in disks:
5729     lu.cfg.SetDiskID(disk, instance.primary_node)
5730
5731   return disks_ok, device_info
5732
5733
5734 def _StartInstanceDisks(lu, instance, force):
5735   """Start the disks of an instance.
5736
5737   """
5738   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5739                                            ignore_secondaries=force)
5740   if not disks_ok:
5741     _ShutdownInstanceDisks(lu, instance)
5742     if force is not None and not force:
5743       lu.proc.LogWarning("", hint="If the message above refers to a"
5744                          " secondary node,"
5745                          " you can retry the operation using '--force'.")
5746     raise errors.OpExecError("Disk consistency error")
5747
5748
5749 class LUInstanceDeactivateDisks(NoHooksLU):
5750   """Shutdown an instance's disks.
5751
5752   """
5753   REQ_BGL = False
5754
5755   def ExpandNames(self):
5756     self._ExpandAndLockInstance()
5757     self.needed_locks[locking.LEVEL_NODE] = []
5758     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5759
5760   def DeclareLocks(self, level):
5761     if level == locking.LEVEL_NODE:
5762       self._LockInstancesNodes()
5763
5764   def CheckPrereq(self):
5765     """Check prerequisites.
5766
5767     This checks that the instance is in the cluster.
5768
5769     """
5770     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5771     assert self.instance is not None, \
5772       "Cannot retrieve locked instance %s" % self.op.instance_name
5773
5774   def Exec(self, feedback_fn):
5775     """Deactivate the disks
5776
5777     """
5778     instance = self.instance
5779     if self.op.force:
5780       _ShutdownInstanceDisks(self, instance)
5781     else:
5782       _SafeShutdownInstanceDisks(self, instance)
5783
5784
5785 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5786   """Shutdown block devices of an instance.
5787
5788   This function checks if an instance is running, before calling
5789   _ShutdownInstanceDisks.
5790
5791   """
5792   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5793   _ShutdownInstanceDisks(lu, instance, disks=disks)
5794
5795
5796 def _ExpandCheckDisks(instance, disks):
5797   """Return the instance disks selected by the disks list
5798
5799   @type disks: list of L{objects.Disk} or None
5800   @param disks: selected disks
5801   @rtype: list of L{objects.Disk}
5802   @return: selected instance disks to act on
5803
5804   """
5805   if disks is None:
5806     return instance.disks
5807   else:
5808     if not set(disks).issubset(instance.disks):
5809       raise errors.ProgrammerError("Can only act on disks belonging to the"
5810                                    " target instance")
5811     return disks
5812
5813
5814 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5815   """Shutdown block devices of an instance.
5816
5817   This does the shutdown on all nodes of the instance.
5818
5819   If the ignore_primary is false, errors on the primary node are
5820   ignored.
5821
5822   """
5823   all_result = True
5824   disks = _ExpandCheckDisks(instance, disks)
5825
5826   for disk in disks:
5827     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5828       lu.cfg.SetDiskID(top_disk, node)
5829       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5830       msg = result.fail_msg
5831       if msg:
5832         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5833                       disk.iv_name, node, msg)
5834         if ((node == instance.primary_node and not ignore_primary) or
5835             (node != instance.primary_node and not result.offline)):
5836           all_result = False
5837   return all_result
5838
5839
5840 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5841   """Checks if a node has enough free memory.
5842
5843   This function check if a given node has the needed amount of free
5844   memory. In case the node has less memory or we cannot get the
5845   information from the node, this function raise an OpPrereqError
5846   exception.
5847
5848   @type lu: C{LogicalUnit}
5849   @param lu: a logical unit from which we get configuration data
5850   @type node: C{str}
5851   @param node: the node to check
5852   @type reason: C{str}
5853   @param reason: string to use in the error message
5854   @type requested: C{int}
5855   @param requested: the amount of memory in MiB to check for
5856   @type hypervisor_name: C{str}
5857   @param hypervisor_name: the hypervisor to ask for memory stats
5858   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5859       we cannot check the node
5860
5861   """
5862   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5863   nodeinfo[node].Raise("Can't get data from node %s" % node,
5864                        prereq=True, ecode=errors.ECODE_ENVIRON)
5865   free_mem = nodeinfo[node].payload.get("memory_free", None)
5866   if not isinstance(free_mem, int):
5867     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5868                                " was '%s'" % (node, free_mem),
5869                                errors.ECODE_ENVIRON)
5870   if requested > free_mem:
5871     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5872                                " needed %s MiB, available %s MiB" %
5873                                (node, reason, requested, free_mem),
5874                                errors.ECODE_NORES)
5875
5876
5877 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5878   """Checks if nodes have enough free disk space in the all VGs.
5879
5880   This function check if all given nodes have the needed amount of
5881   free disk. In case any node has less disk or we cannot get the
5882   information from the node, this function raise an OpPrereqError
5883   exception.
5884
5885   @type lu: C{LogicalUnit}
5886   @param lu: a logical unit from which we get configuration data
5887   @type nodenames: C{list}
5888   @param nodenames: the list of node names to check
5889   @type req_sizes: C{dict}
5890   @param req_sizes: the hash of vg and corresponding amount of disk in
5891       MiB to check for
5892   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5893       or we cannot check the node
5894
5895   """
5896   for vg, req_size in req_sizes.items():
5897     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5898
5899
5900 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5901   """Checks if nodes have enough free disk space in the specified VG.
5902
5903   This function check if all given nodes have the needed amount of
5904   free disk. In case any node has less disk or we cannot get the
5905   information from the node, this function raise an OpPrereqError
5906   exception.
5907
5908   @type lu: C{LogicalUnit}
5909   @param lu: a logical unit from which we get configuration data
5910   @type nodenames: C{list}
5911   @param nodenames: the list of node names to check
5912   @type vg: C{str}
5913   @param vg: the volume group to check
5914   @type requested: C{int}
5915   @param requested: the amount of disk in MiB to check for
5916   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5917       or we cannot check the node
5918
5919   """
5920   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5921   for node in nodenames:
5922     info = nodeinfo[node]
5923     info.Raise("Cannot get current information from node %s" % node,
5924                prereq=True, ecode=errors.ECODE_ENVIRON)
5925     vg_free = info.payload.get("vg_free", None)
5926     if not isinstance(vg_free, int):
5927       raise errors.OpPrereqError("Can't compute free disk space on node"
5928                                  " %s for vg %s, result was '%s'" %
5929                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5930     if requested > vg_free:
5931       raise errors.OpPrereqError("Not enough disk space on target node %s"
5932                                  " vg %s: required %d MiB, available %d MiB" %
5933                                  (node, vg, requested, vg_free),
5934                                  errors.ECODE_NORES)
5935
5936
5937 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5938   """Checks if nodes have enough physical CPUs
5939
5940   This function checks if all given nodes have the needed number of
5941   physical CPUs. In case any node has less CPUs or we cannot get the
5942   information from the node, this function raises an OpPrereqError
5943   exception.
5944
5945   @type lu: C{LogicalUnit}
5946   @param lu: a logical unit from which we get configuration data
5947   @type nodenames: C{list}
5948   @param nodenames: the list of node names to check
5949   @type requested: C{int}
5950   @param requested: the minimum acceptable number of physical CPUs
5951   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5952       or we cannot check the node
5953
5954   """
5955   nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
5956   for node in nodenames:
5957     info = nodeinfo[node]
5958     info.Raise("Cannot get current information from node %s" % node,
5959                prereq=True, ecode=errors.ECODE_ENVIRON)
5960     num_cpus = info.payload.get("cpu_total", None)
5961     if not isinstance(num_cpus, int):
5962       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
5963                                  " on node %s, result was '%s'" %
5964                                  (node, num_cpus), errors.ECODE_ENVIRON)
5965     if requested > num_cpus:
5966       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
5967                                  "required" % (node, num_cpus, requested),
5968                                  errors.ECODE_NORES)
5969
5970
5971 class LUInstanceStartup(LogicalUnit):
5972   """Starts an instance.
5973
5974   """
5975   HPATH = "instance-start"
5976   HTYPE = constants.HTYPE_INSTANCE
5977   REQ_BGL = False
5978
5979   def CheckArguments(self):
5980     # extra beparams
5981     if self.op.beparams:
5982       # fill the beparams dict
5983       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5984
5985   def ExpandNames(self):
5986     self._ExpandAndLockInstance()
5987
5988   def BuildHooksEnv(self):
5989     """Build hooks env.
5990
5991     This runs on master, primary and secondary nodes of the instance.
5992
5993     """
5994     env = {
5995       "FORCE": self.op.force,
5996       }
5997
5998     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5999
6000     return env
6001
6002   def BuildHooksNodes(self):
6003     """Build hooks nodes.
6004
6005     """
6006     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6007     return (nl, nl)
6008
6009   def CheckPrereq(self):
6010     """Check prerequisites.
6011
6012     This checks that the instance is in the cluster.
6013
6014     """
6015     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6016     assert self.instance is not None, \
6017       "Cannot retrieve locked instance %s" % self.op.instance_name
6018
6019     # extra hvparams
6020     if self.op.hvparams:
6021       # check hypervisor parameter syntax (locally)
6022       cluster = self.cfg.GetClusterInfo()
6023       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6024       filled_hvp = cluster.FillHV(instance)
6025       filled_hvp.update(self.op.hvparams)
6026       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6027       hv_type.CheckParameterSyntax(filled_hvp)
6028       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6029
6030     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6031
6032     if self.primary_offline and self.op.ignore_offline_nodes:
6033       self.proc.LogWarning("Ignoring offline primary node")
6034
6035       if self.op.hvparams or self.op.beparams:
6036         self.proc.LogWarning("Overridden parameters are ignored")
6037     else:
6038       _CheckNodeOnline(self, instance.primary_node)
6039
6040       bep = self.cfg.GetClusterInfo().FillBE(instance)
6041
6042       # check bridges existence
6043       _CheckInstanceBridgesExist(self, instance)
6044
6045       remote_info = self.rpc.call_instance_info(instance.primary_node,
6046                                                 instance.name,
6047                                                 instance.hypervisor)
6048       remote_info.Raise("Error checking node %s" % instance.primary_node,
6049                         prereq=True, ecode=errors.ECODE_ENVIRON)
6050       if not remote_info.payload: # not running already
6051         _CheckNodeFreeMemory(self, instance.primary_node,
6052                              "starting instance %s" % instance.name,
6053                              bep[constants.BE_MEMORY], instance.hypervisor)
6054
6055   def Exec(self, feedback_fn):
6056     """Start the instance.
6057
6058     """
6059     instance = self.instance
6060     force = self.op.force
6061
6062     if not self.op.no_remember:
6063       self.cfg.MarkInstanceUp(instance.name)
6064
6065     if self.primary_offline:
6066       assert self.op.ignore_offline_nodes
6067       self.proc.LogInfo("Primary node offline, marked instance as started")
6068     else:
6069       node_current = instance.primary_node
6070
6071       _StartInstanceDisks(self, instance, force)
6072
6073       result = self.rpc.call_instance_start(node_current, instance,
6074                                             self.op.hvparams, self.op.beparams,
6075                                             self.op.startup_paused)
6076       msg = result.fail_msg
6077       if msg:
6078         _ShutdownInstanceDisks(self, instance)
6079         raise errors.OpExecError("Could not start instance: %s" % msg)
6080
6081
6082 class LUInstanceReboot(LogicalUnit):
6083   """Reboot an instance.
6084
6085   """
6086   HPATH = "instance-reboot"
6087   HTYPE = constants.HTYPE_INSTANCE
6088   REQ_BGL = False
6089
6090   def ExpandNames(self):
6091     self._ExpandAndLockInstance()
6092
6093   def BuildHooksEnv(self):
6094     """Build hooks env.
6095
6096     This runs on master, primary and secondary nodes of the instance.
6097
6098     """
6099     env = {
6100       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6101       "REBOOT_TYPE": self.op.reboot_type,
6102       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6103       }
6104
6105     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6106
6107     return env
6108
6109   def BuildHooksNodes(self):
6110     """Build hooks nodes.
6111
6112     """
6113     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6114     return (nl, nl)
6115
6116   def CheckPrereq(self):
6117     """Check prerequisites.
6118
6119     This checks that the instance is in the cluster.
6120
6121     """
6122     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6123     assert self.instance is not None, \
6124       "Cannot retrieve locked instance %s" % self.op.instance_name
6125
6126     _CheckNodeOnline(self, instance.primary_node)
6127
6128     # check bridges existence
6129     _CheckInstanceBridgesExist(self, instance)
6130
6131   def Exec(self, feedback_fn):
6132     """Reboot the instance.
6133
6134     """
6135     instance = self.instance
6136     ignore_secondaries = self.op.ignore_secondaries
6137     reboot_type = self.op.reboot_type
6138
6139     remote_info = self.rpc.call_instance_info(instance.primary_node,
6140                                               instance.name,
6141                                               instance.hypervisor)
6142     remote_info.Raise("Error checking node %s" % instance.primary_node)
6143     instance_running = bool(remote_info.payload)
6144
6145     node_current = instance.primary_node
6146
6147     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6148                                             constants.INSTANCE_REBOOT_HARD]:
6149       for disk in instance.disks:
6150         self.cfg.SetDiskID(disk, node_current)
6151       result = self.rpc.call_instance_reboot(node_current, instance,
6152                                              reboot_type,
6153                                              self.op.shutdown_timeout)
6154       result.Raise("Could not reboot instance")
6155     else:
6156       if instance_running:
6157         result = self.rpc.call_instance_shutdown(node_current, instance,
6158                                                  self.op.shutdown_timeout)
6159         result.Raise("Could not shutdown instance for full reboot")
6160         _ShutdownInstanceDisks(self, instance)
6161       else:
6162         self.LogInfo("Instance %s was already stopped, starting now",
6163                      instance.name)
6164       _StartInstanceDisks(self, instance, ignore_secondaries)
6165       result = self.rpc.call_instance_start(node_current, instance,
6166                                             None, None, False)
6167       msg = result.fail_msg
6168       if msg:
6169         _ShutdownInstanceDisks(self, instance)
6170         raise errors.OpExecError("Could not start instance for"
6171                                  " full reboot: %s" % msg)
6172
6173     self.cfg.MarkInstanceUp(instance.name)
6174
6175
6176 class LUInstanceShutdown(LogicalUnit):
6177   """Shutdown an instance.
6178
6179   """
6180   HPATH = "instance-stop"
6181   HTYPE = constants.HTYPE_INSTANCE
6182   REQ_BGL = False
6183
6184   def ExpandNames(self):
6185     self._ExpandAndLockInstance()
6186
6187   def BuildHooksEnv(self):
6188     """Build hooks env.
6189
6190     This runs on master, primary and secondary nodes of the instance.
6191
6192     """
6193     env = _BuildInstanceHookEnvByObject(self, self.instance)
6194     env["TIMEOUT"] = self.op.timeout
6195     return env
6196
6197   def BuildHooksNodes(self):
6198     """Build hooks nodes.
6199
6200     """
6201     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6202     return (nl, nl)
6203
6204   def CheckPrereq(self):
6205     """Check prerequisites.
6206
6207     This checks that the instance is in the cluster.
6208
6209     """
6210     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6211     assert self.instance is not None, \
6212       "Cannot retrieve locked instance %s" % self.op.instance_name
6213
6214     self.primary_offline = \
6215       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6216
6217     if self.primary_offline and self.op.ignore_offline_nodes:
6218       self.proc.LogWarning("Ignoring offline primary node")
6219     else:
6220       _CheckNodeOnline(self, self.instance.primary_node)
6221
6222   def Exec(self, feedback_fn):
6223     """Shutdown the instance.
6224
6225     """
6226     instance = self.instance
6227     node_current = instance.primary_node
6228     timeout = self.op.timeout
6229
6230     if not self.op.no_remember:
6231       self.cfg.MarkInstanceDown(instance.name)
6232
6233     if self.primary_offline:
6234       assert self.op.ignore_offline_nodes
6235       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6236     else:
6237       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6238       msg = result.fail_msg
6239       if msg:
6240         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6241
6242       _ShutdownInstanceDisks(self, instance)
6243
6244
6245 class LUInstanceReinstall(LogicalUnit):
6246   """Reinstall an instance.
6247
6248   """
6249   HPATH = "instance-reinstall"
6250   HTYPE = constants.HTYPE_INSTANCE
6251   REQ_BGL = False
6252
6253   def ExpandNames(self):
6254     self._ExpandAndLockInstance()
6255
6256   def BuildHooksEnv(self):
6257     """Build hooks env.
6258
6259     This runs on master, primary and secondary nodes of the instance.
6260
6261     """
6262     return _BuildInstanceHookEnvByObject(self, self.instance)
6263
6264   def BuildHooksNodes(self):
6265     """Build hooks nodes.
6266
6267     """
6268     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6269     return (nl, nl)
6270
6271   def CheckPrereq(self):
6272     """Check prerequisites.
6273
6274     This checks that the instance is in the cluster and is not running.
6275
6276     """
6277     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6278     assert instance is not None, \
6279       "Cannot retrieve locked instance %s" % self.op.instance_name
6280     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6281                      " offline, cannot reinstall")
6282     for node in instance.secondary_nodes:
6283       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6284                        " cannot reinstall")
6285
6286     if instance.disk_template == constants.DT_DISKLESS:
6287       raise errors.OpPrereqError("Instance '%s' has no disks" %
6288                                  self.op.instance_name,
6289                                  errors.ECODE_INVAL)
6290     _CheckInstanceDown(self, instance, "cannot reinstall")
6291
6292     if self.op.os_type is not None:
6293       # OS verification
6294       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6295       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6296       instance_os = self.op.os_type
6297     else:
6298       instance_os = instance.os
6299
6300     nodelist = list(instance.all_nodes)
6301
6302     if self.op.osparams:
6303       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6304       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6305       self.os_inst = i_osdict # the new dict (without defaults)
6306     else:
6307       self.os_inst = None
6308
6309     self.instance = instance
6310
6311   def Exec(self, feedback_fn):
6312     """Reinstall the instance.
6313
6314     """
6315     inst = self.instance
6316
6317     if self.op.os_type is not None:
6318       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6319       inst.os = self.op.os_type
6320       # Write to configuration
6321       self.cfg.Update(inst, feedback_fn)
6322
6323     _StartInstanceDisks(self, inst, None)
6324     try:
6325       feedback_fn("Running the instance OS create scripts...")
6326       # FIXME: pass debug option from opcode to backend
6327       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6328                                              self.op.debug_level,
6329                                              osparams=self.os_inst)
6330       result.Raise("Could not install OS for instance %s on node %s" %
6331                    (inst.name, inst.primary_node))
6332     finally:
6333       _ShutdownInstanceDisks(self, inst)
6334
6335
6336 class LUInstanceRecreateDisks(LogicalUnit):
6337   """Recreate an instance's missing disks.
6338
6339   """
6340   HPATH = "instance-recreate-disks"
6341   HTYPE = constants.HTYPE_INSTANCE
6342   REQ_BGL = False
6343
6344   def CheckArguments(self):
6345     # normalise the disk list
6346     self.op.disks = sorted(frozenset(self.op.disks))
6347
6348   def ExpandNames(self):
6349     self._ExpandAndLockInstance()
6350     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6351     if self.op.nodes:
6352       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6353       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6354     else:
6355       self.needed_locks[locking.LEVEL_NODE] = []
6356
6357   def DeclareLocks(self, level):
6358     if level == locking.LEVEL_NODE:
6359       # if we replace the nodes, we only need to lock the old primary,
6360       # otherwise we need to lock all nodes for disk re-creation
6361       primary_only = bool(self.op.nodes)
6362       self._LockInstancesNodes(primary_only=primary_only)
6363
6364   def BuildHooksEnv(self):
6365     """Build hooks env.
6366
6367     This runs on master, primary and secondary nodes of the instance.
6368
6369     """
6370     return _BuildInstanceHookEnvByObject(self, self.instance)
6371
6372   def BuildHooksNodes(self):
6373     """Build hooks nodes.
6374
6375     """
6376     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6377     return (nl, nl)
6378
6379   def CheckPrereq(self):
6380     """Check prerequisites.
6381
6382     This checks that the instance is in the cluster and is not running.
6383
6384     """
6385     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6386     assert instance is not None, \
6387       "Cannot retrieve locked instance %s" % self.op.instance_name
6388     if self.op.nodes:
6389       if len(self.op.nodes) != len(instance.all_nodes):
6390         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6391                                    " %d replacement nodes were specified" %
6392                                    (instance.name, len(instance.all_nodes),
6393                                     len(self.op.nodes)),
6394                                    errors.ECODE_INVAL)
6395       assert instance.disk_template != constants.DT_DRBD8 or \
6396           len(self.op.nodes) == 2
6397       assert instance.disk_template != constants.DT_PLAIN or \
6398           len(self.op.nodes) == 1
6399       primary_node = self.op.nodes[0]
6400     else:
6401       primary_node = instance.primary_node
6402     _CheckNodeOnline(self, primary_node)
6403
6404     if instance.disk_template == constants.DT_DISKLESS:
6405       raise errors.OpPrereqError("Instance '%s' has no disks" %
6406                                  self.op.instance_name, errors.ECODE_INVAL)
6407     # if we replace nodes *and* the old primary is offline, we don't
6408     # check
6409     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6410     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6411     if not (self.op.nodes and old_pnode.offline):
6412       _CheckInstanceDown(self, instance, "cannot recreate disks")
6413
6414     if not self.op.disks:
6415       self.op.disks = range(len(instance.disks))
6416     else:
6417       for idx in self.op.disks:
6418         if idx >= len(instance.disks):
6419           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6420                                      errors.ECODE_INVAL)
6421     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6422       raise errors.OpPrereqError("Can't recreate disks partially and"
6423                                  " change the nodes at the same time",
6424                                  errors.ECODE_INVAL)
6425     self.instance = instance
6426
6427   def Exec(self, feedback_fn):
6428     """Recreate the disks.
6429
6430     """
6431     instance = self.instance
6432
6433     to_skip = []
6434     mods = [] # keeps track of needed logical_id changes
6435
6436     for idx, disk in enumerate(instance.disks):
6437       if idx not in self.op.disks: # disk idx has not been passed in
6438         to_skip.append(idx)
6439         continue
6440       # update secondaries for disks, if needed
6441       if self.op.nodes:
6442         if disk.dev_type == constants.LD_DRBD8:
6443           # need to update the nodes and minors
6444           assert len(self.op.nodes) == 2
6445           assert len(disk.logical_id) == 6 # otherwise disk internals
6446                                            # have changed
6447           (_, _, old_port, _, _, old_secret) = disk.logical_id
6448           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6449           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6450                     new_minors[0], new_minors[1], old_secret)
6451           assert len(disk.logical_id) == len(new_id)
6452           mods.append((idx, new_id))
6453
6454     # now that we have passed all asserts above, we can apply the mods
6455     # in a single run (to avoid partial changes)
6456     for idx, new_id in mods:
6457       instance.disks[idx].logical_id = new_id
6458
6459     # change primary node, if needed
6460     if self.op.nodes:
6461       instance.primary_node = self.op.nodes[0]
6462       self.LogWarning("Changing the instance's nodes, you will have to"
6463                       " remove any disks left on the older nodes manually")
6464
6465     if self.op.nodes:
6466       self.cfg.Update(instance, feedback_fn)
6467
6468     _CreateDisks(self, instance, to_skip=to_skip)
6469
6470
6471 class LUInstanceRename(LogicalUnit):
6472   """Rename an instance.
6473
6474   """
6475   HPATH = "instance-rename"
6476   HTYPE = constants.HTYPE_INSTANCE
6477
6478   def CheckArguments(self):
6479     """Check arguments.
6480
6481     """
6482     if self.op.ip_check and not self.op.name_check:
6483       # TODO: make the ip check more flexible and not depend on the name check
6484       raise errors.OpPrereqError("IP address check requires a name check",
6485                                  errors.ECODE_INVAL)
6486
6487   def BuildHooksEnv(self):
6488     """Build hooks env.
6489
6490     This runs on master, primary and secondary nodes of the instance.
6491
6492     """
6493     env = _BuildInstanceHookEnvByObject(self, self.instance)
6494     env["INSTANCE_NEW_NAME"] = self.op.new_name
6495     return env
6496
6497   def BuildHooksNodes(self):
6498     """Build hooks nodes.
6499
6500     """
6501     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6502     return (nl, nl)
6503
6504   def CheckPrereq(self):
6505     """Check prerequisites.
6506
6507     This checks that the instance is in the cluster and is not running.
6508
6509     """
6510     self.op.instance_name = _ExpandInstanceName(self.cfg,
6511                                                 self.op.instance_name)
6512     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6513     assert instance is not None
6514     _CheckNodeOnline(self, instance.primary_node)
6515     _CheckInstanceDown(self, instance, "cannot rename")
6516     self.instance = instance
6517
6518     new_name = self.op.new_name
6519     if self.op.name_check:
6520       hostname = netutils.GetHostname(name=new_name)
6521       if hostname != new_name:
6522         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6523                      hostname.name)
6524       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6525         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6526                                     " same as given hostname '%s'") %
6527                                     (hostname.name, self.op.new_name),
6528                                     errors.ECODE_INVAL)
6529       new_name = self.op.new_name = hostname.name
6530       if (self.op.ip_check and
6531           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6532         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6533                                    (hostname.ip, new_name),
6534                                    errors.ECODE_NOTUNIQUE)
6535
6536     instance_list = self.cfg.GetInstanceList()
6537     if new_name in instance_list and new_name != instance.name:
6538       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6539                                  new_name, errors.ECODE_EXISTS)
6540
6541   def Exec(self, feedback_fn):
6542     """Rename the instance.
6543
6544     """
6545     inst = self.instance
6546     old_name = inst.name
6547
6548     rename_file_storage = False
6549     if (inst.disk_template in constants.DTS_FILEBASED and
6550         self.op.new_name != inst.name):
6551       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6552       rename_file_storage = True
6553
6554     self.cfg.RenameInstance(inst.name, self.op.new_name)
6555     # Change the instance lock. This is definitely safe while we hold the BGL.
6556     # Otherwise the new lock would have to be added in acquired mode.
6557     assert self.REQ_BGL
6558     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6559     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6560
6561     # re-read the instance from the configuration after rename
6562     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6563
6564     if rename_file_storage:
6565       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6566       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6567                                                      old_file_storage_dir,
6568                                                      new_file_storage_dir)
6569       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6570                    " (but the instance has been renamed in Ganeti)" %
6571                    (inst.primary_node, old_file_storage_dir,
6572                     new_file_storage_dir))
6573
6574     _StartInstanceDisks(self, inst, None)
6575     try:
6576       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6577                                                  old_name, self.op.debug_level)
6578       msg = result.fail_msg
6579       if msg:
6580         msg = ("Could not run OS rename script for instance %s on node %s"
6581                " (but the instance has been renamed in Ganeti): %s" %
6582                (inst.name, inst.primary_node, msg))
6583         self.proc.LogWarning(msg)
6584     finally:
6585       _ShutdownInstanceDisks(self, inst)
6586
6587     return inst.name
6588
6589
6590 class LUInstanceRemove(LogicalUnit):
6591   """Remove an instance.
6592
6593   """
6594   HPATH = "instance-remove"
6595   HTYPE = constants.HTYPE_INSTANCE
6596   REQ_BGL = False
6597
6598   def ExpandNames(self):
6599     self._ExpandAndLockInstance()
6600     self.needed_locks[locking.LEVEL_NODE] = []
6601     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6602
6603   def DeclareLocks(self, level):
6604     if level == locking.LEVEL_NODE:
6605       self._LockInstancesNodes()
6606
6607   def BuildHooksEnv(self):
6608     """Build hooks env.
6609
6610     This runs on master, primary and secondary nodes of the instance.
6611
6612     """
6613     env = _BuildInstanceHookEnvByObject(self, self.instance)
6614     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6615     return env
6616
6617   def BuildHooksNodes(self):
6618     """Build hooks nodes.
6619
6620     """
6621     nl = [self.cfg.GetMasterNode()]
6622     nl_post = list(self.instance.all_nodes) + nl
6623     return (nl, nl_post)
6624
6625   def CheckPrereq(self):
6626     """Check prerequisites.
6627
6628     This checks that the instance is in the cluster.
6629
6630     """
6631     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6632     assert self.instance is not None, \
6633       "Cannot retrieve locked instance %s" % self.op.instance_name
6634
6635   def Exec(self, feedback_fn):
6636     """Remove the instance.
6637
6638     """
6639     instance = self.instance
6640     logging.info("Shutting down instance %s on node %s",
6641                  instance.name, instance.primary_node)
6642
6643     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6644                                              self.op.shutdown_timeout)
6645     msg = result.fail_msg
6646     if msg:
6647       if self.op.ignore_failures:
6648         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6649       else:
6650         raise errors.OpExecError("Could not shutdown instance %s on"
6651                                  " node %s: %s" %
6652                                  (instance.name, instance.primary_node, msg))
6653
6654     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6655
6656
6657 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6658   """Utility function to remove an instance.
6659
6660   """
6661   logging.info("Removing block devices for instance %s", instance.name)
6662
6663   if not _RemoveDisks(lu, instance):
6664     if not ignore_failures:
6665       raise errors.OpExecError("Can't remove instance's disks")
6666     feedback_fn("Warning: can't remove instance's disks")
6667
6668   logging.info("Removing instance %s out of cluster config", instance.name)
6669
6670   lu.cfg.RemoveInstance(instance.name)
6671
6672   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6673     "Instance lock removal conflict"
6674
6675   # Remove lock for the instance
6676   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6677
6678
6679 class LUInstanceQuery(NoHooksLU):
6680   """Logical unit for querying instances.
6681
6682   """
6683   # pylint: disable=W0142
6684   REQ_BGL = False
6685
6686   def CheckArguments(self):
6687     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6688                              self.op.output_fields, self.op.use_locking)
6689
6690   def ExpandNames(self):
6691     self.iq.ExpandNames(self)
6692
6693   def DeclareLocks(self, level):
6694     self.iq.DeclareLocks(self, level)
6695
6696   def Exec(self, feedback_fn):
6697     return self.iq.OldStyleQuery(self)
6698
6699
6700 class LUInstanceFailover(LogicalUnit):
6701   """Failover an instance.
6702
6703   """
6704   HPATH = "instance-failover"
6705   HTYPE = constants.HTYPE_INSTANCE
6706   REQ_BGL = False
6707
6708   def CheckArguments(self):
6709     """Check the arguments.
6710
6711     """
6712     self.iallocator = getattr(self.op, "iallocator", None)
6713     self.target_node = getattr(self.op, "target_node", None)
6714
6715   def ExpandNames(self):
6716     self._ExpandAndLockInstance()
6717
6718     if self.op.target_node is not None:
6719       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6720
6721     self.needed_locks[locking.LEVEL_NODE] = []
6722     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6723
6724     ignore_consistency = self.op.ignore_consistency
6725     shutdown_timeout = self.op.shutdown_timeout
6726     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6727                                        cleanup=False,
6728                                        failover=True,
6729                                        ignore_consistency=ignore_consistency,
6730                                        shutdown_timeout=shutdown_timeout)
6731     self.tasklets = [self._migrater]
6732
6733   def DeclareLocks(self, level):
6734     if level == locking.LEVEL_NODE:
6735       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6736       if instance.disk_template in constants.DTS_EXT_MIRROR:
6737         if self.op.target_node is None:
6738           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6739         else:
6740           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6741                                                    self.op.target_node]
6742         del self.recalculate_locks[locking.LEVEL_NODE]
6743       else:
6744         self._LockInstancesNodes()
6745
6746   def BuildHooksEnv(self):
6747     """Build hooks env.
6748
6749     This runs on master, primary and secondary nodes of the instance.
6750
6751     """
6752     instance = self._migrater.instance
6753     source_node = instance.primary_node
6754     target_node = self.op.target_node
6755     env = {
6756       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6757       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6758       "OLD_PRIMARY": source_node,
6759       "NEW_PRIMARY": target_node,
6760       }
6761
6762     if instance.disk_template in constants.DTS_INT_MIRROR:
6763       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6764       env["NEW_SECONDARY"] = source_node
6765     else:
6766       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6767
6768     env.update(_BuildInstanceHookEnvByObject(self, instance))
6769
6770     return env
6771
6772   def BuildHooksNodes(self):
6773     """Build hooks nodes.
6774
6775     """
6776     instance = self._migrater.instance
6777     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6778     return (nl, nl + [instance.primary_node])
6779
6780
6781 class LUInstanceMigrate(LogicalUnit):
6782   """Migrate an instance.
6783
6784   This is migration without shutting down, compared to the failover,
6785   which is done with shutdown.
6786
6787   """
6788   HPATH = "instance-migrate"
6789   HTYPE = constants.HTYPE_INSTANCE
6790   REQ_BGL = False
6791
6792   def ExpandNames(self):
6793     self._ExpandAndLockInstance()
6794
6795     if self.op.target_node is not None:
6796       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6797
6798     self.needed_locks[locking.LEVEL_NODE] = []
6799     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6800
6801     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6802                                        cleanup=self.op.cleanup,
6803                                        failover=False,
6804                                        fallback=self.op.allow_failover)
6805     self.tasklets = [self._migrater]
6806
6807   def DeclareLocks(self, level):
6808     if level == locking.LEVEL_NODE:
6809       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6810       if instance.disk_template in constants.DTS_EXT_MIRROR:
6811         if self.op.target_node is None:
6812           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6813         else:
6814           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6815                                                    self.op.target_node]
6816         del self.recalculate_locks[locking.LEVEL_NODE]
6817       else:
6818         self._LockInstancesNodes()
6819
6820   def BuildHooksEnv(self):
6821     """Build hooks env.
6822
6823     This runs on master, primary and secondary nodes of the instance.
6824
6825     """
6826     instance = self._migrater.instance
6827     source_node = instance.primary_node
6828     target_node = self.op.target_node
6829     env = _BuildInstanceHookEnvByObject(self, instance)
6830     env.update({
6831       "MIGRATE_LIVE": self._migrater.live,
6832       "MIGRATE_CLEANUP": self.op.cleanup,
6833       "OLD_PRIMARY": source_node,
6834       "NEW_PRIMARY": target_node,
6835       })
6836
6837     if instance.disk_template in constants.DTS_INT_MIRROR:
6838       env["OLD_SECONDARY"] = target_node
6839       env["NEW_SECONDARY"] = source_node
6840     else:
6841       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6842
6843     return env
6844
6845   def BuildHooksNodes(self):
6846     """Build hooks nodes.
6847
6848     """
6849     instance = self._migrater.instance
6850     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6851     return (nl, nl + [instance.primary_node])
6852
6853
6854 class LUInstanceMove(LogicalUnit):
6855   """Move an instance by data-copying.
6856
6857   """
6858   HPATH = "instance-move"
6859   HTYPE = constants.HTYPE_INSTANCE
6860   REQ_BGL = False
6861
6862   def ExpandNames(self):
6863     self._ExpandAndLockInstance()
6864     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6865     self.op.target_node = target_node
6866     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6867     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6868
6869   def DeclareLocks(self, level):
6870     if level == locking.LEVEL_NODE:
6871       self._LockInstancesNodes(primary_only=True)
6872
6873   def BuildHooksEnv(self):
6874     """Build hooks env.
6875
6876     This runs on master, primary and secondary nodes of the instance.
6877
6878     """
6879     env = {
6880       "TARGET_NODE": self.op.target_node,
6881       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6882       }
6883     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6884     return env
6885
6886   def BuildHooksNodes(self):
6887     """Build hooks nodes.
6888
6889     """
6890     nl = [
6891       self.cfg.GetMasterNode(),
6892       self.instance.primary_node,
6893       self.op.target_node,
6894       ]
6895     return (nl, nl)
6896
6897   def CheckPrereq(self):
6898     """Check prerequisites.
6899
6900     This checks that the instance is in the cluster.
6901
6902     """
6903     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6904     assert self.instance is not None, \
6905       "Cannot retrieve locked instance %s" % self.op.instance_name
6906
6907     node = self.cfg.GetNodeInfo(self.op.target_node)
6908     assert node is not None, \
6909       "Cannot retrieve locked node %s" % self.op.target_node
6910
6911     self.target_node = target_node = node.name
6912
6913     if target_node == instance.primary_node:
6914       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6915                                  (instance.name, target_node),
6916                                  errors.ECODE_STATE)
6917
6918     bep = self.cfg.GetClusterInfo().FillBE(instance)
6919
6920     for idx, dsk in enumerate(instance.disks):
6921       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6922         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6923                                    " cannot copy" % idx, errors.ECODE_STATE)
6924
6925     _CheckNodeOnline(self, target_node)
6926     _CheckNodeNotDrained(self, target_node)
6927     _CheckNodeVmCapable(self, target_node)
6928
6929     if instance.admin_up:
6930       # check memory requirements on the secondary node
6931       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6932                            instance.name, bep[constants.BE_MEMORY],
6933                            instance.hypervisor)
6934     else:
6935       self.LogInfo("Not checking memory on the secondary node as"
6936                    " instance will not be started")
6937
6938     # check bridge existance
6939     _CheckInstanceBridgesExist(self, instance, node=target_node)
6940
6941   def Exec(self, feedback_fn):
6942     """Move an instance.
6943
6944     The move is done by shutting it down on its present node, copying
6945     the data over (slow) and starting it on the new node.
6946
6947     """
6948     instance = self.instance
6949
6950     source_node = instance.primary_node
6951     target_node = self.target_node
6952
6953     self.LogInfo("Shutting down instance %s on source node %s",
6954                  instance.name, source_node)
6955
6956     result = self.rpc.call_instance_shutdown(source_node, instance,
6957                                              self.op.shutdown_timeout)
6958     msg = result.fail_msg
6959     if msg:
6960       if self.op.ignore_consistency:
6961         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6962                              " Proceeding anyway. Please make sure node"
6963                              " %s is down. Error details: %s",
6964                              instance.name, source_node, source_node, msg)
6965       else:
6966         raise errors.OpExecError("Could not shutdown instance %s on"
6967                                  " node %s: %s" %
6968                                  (instance.name, source_node, msg))
6969
6970     # create the target disks
6971     try:
6972       _CreateDisks(self, instance, target_node=target_node)
6973     except errors.OpExecError:
6974       self.LogWarning("Device creation failed, reverting...")
6975       try:
6976         _RemoveDisks(self, instance, target_node=target_node)
6977       finally:
6978         self.cfg.ReleaseDRBDMinors(instance.name)
6979         raise
6980
6981     cluster_name = self.cfg.GetClusterInfo().cluster_name
6982
6983     errs = []
6984     # activate, get path, copy the data over
6985     for idx, disk in enumerate(instance.disks):
6986       self.LogInfo("Copying data for disk %d", idx)
6987       result = self.rpc.call_blockdev_assemble(target_node, disk,
6988                                                instance.name, True, idx)
6989       if result.fail_msg:
6990         self.LogWarning("Can't assemble newly created disk %d: %s",
6991                         idx, result.fail_msg)
6992         errs.append(result.fail_msg)
6993         break
6994       dev_path = result.payload
6995       result = self.rpc.call_blockdev_export(source_node, disk,
6996                                              target_node, dev_path,
6997                                              cluster_name)
6998       if result.fail_msg:
6999         self.LogWarning("Can't copy data over for disk %d: %s",
7000                         idx, result.fail_msg)
7001         errs.append(result.fail_msg)
7002         break
7003
7004     if errs:
7005       self.LogWarning("Some disks failed to copy, aborting")
7006       try:
7007         _RemoveDisks(self, instance, target_node=target_node)
7008       finally:
7009         self.cfg.ReleaseDRBDMinors(instance.name)
7010         raise errors.OpExecError("Errors during disk copy: %s" %
7011                                  (",".join(errs),))
7012
7013     instance.primary_node = target_node
7014     self.cfg.Update(instance, feedback_fn)
7015
7016     self.LogInfo("Removing the disks on the original node")
7017     _RemoveDisks(self, instance, target_node=source_node)
7018
7019     # Only start the instance if it's marked as up
7020     if instance.admin_up:
7021       self.LogInfo("Starting instance %s on node %s",
7022                    instance.name, target_node)
7023
7024       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7025                                            ignore_secondaries=True)
7026       if not disks_ok:
7027         _ShutdownInstanceDisks(self, instance)
7028         raise errors.OpExecError("Can't activate the instance's disks")
7029
7030       result = self.rpc.call_instance_start(target_node, instance,
7031                                             None, None, False)
7032       msg = result.fail_msg
7033       if msg:
7034         _ShutdownInstanceDisks(self, instance)
7035         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7036                                  (instance.name, target_node, msg))
7037
7038
7039 class LUNodeMigrate(LogicalUnit):
7040   """Migrate all instances from a node.
7041
7042   """
7043   HPATH = "node-migrate"
7044   HTYPE = constants.HTYPE_NODE
7045   REQ_BGL = False
7046
7047   def CheckArguments(self):
7048     pass
7049
7050   def ExpandNames(self):
7051     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7052
7053     self.share_locks = _ShareAll()
7054     self.needed_locks = {
7055       locking.LEVEL_NODE: [self.op.node_name],
7056       }
7057
7058   def BuildHooksEnv(self):
7059     """Build hooks env.
7060
7061     This runs on the master, the primary and all the secondaries.
7062
7063     """
7064     return {
7065       "NODE_NAME": self.op.node_name,
7066       }
7067
7068   def BuildHooksNodes(self):
7069     """Build hooks nodes.
7070
7071     """
7072     nl = [self.cfg.GetMasterNode()]
7073     return (nl, nl)
7074
7075   def CheckPrereq(self):
7076     pass
7077
7078   def Exec(self, feedback_fn):
7079     # Prepare jobs for migration instances
7080     jobs = [
7081       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7082                                  mode=self.op.mode,
7083                                  live=self.op.live,
7084                                  iallocator=self.op.iallocator,
7085                                  target_node=self.op.target_node)]
7086       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7087       ]
7088
7089     # TODO: Run iallocator in this opcode and pass correct placement options to
7090     # OpInstanceMigrate. Since other jobs can modify the cluster between
7091     # running the iallocator and the actual migration, a good consistency model
7092     # will have to be found.
7093
7094     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7095             frozenset([self.op.node_name]))
7096
7097     return ResultWithJobs(jobs)
7098
7099
7100 class TLMigrateInstance(Tasklet):
7101   """Tasklet class for instance migration.
7102
7103   @type live: boolean
7104   @ivar live: whether the migration will be done live or non-live;
7105       this variable is initalized only after CheckPrereq has run
7106   @type cleanup: boolean
7107   @ivar cleanup: Wheater we cleanup from a failed migration
7108   @type iallocator: string
7109   @ivar iallocator: The iallocator used to determine target_node
7110   @type target_node: string
7111   @ivar target_node: If given, the target_node to reallocate the instance to
7112   @type failover: boolean
7113   @ivar failover: Whether operation results in failover or migration
7114   @type fallback: boolean
7115   @ivar fallback: Whether fallback to failover is allowed if migration not
7116                   possible
7117   @type ignore_consistency: boolean
7118   @ivar ignore_consistency: Wheter we should ignore consistency between source
7119                             and target node
7120   @type shutdown_timeout: int
7121   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7122
7123   """
7124
7125   # Constants
7126   _MIGRATION_POLL_INTERVAL = 1      # seconds
7127   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7128
7129   def __init__(self, lu, instance_name, cleanup=False,
7130                failover=False, fallback=False,
7131                ignore_consistency=False,
7132                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7133     """Initializes this class.
7134
7135     """
7136     Tasklet.__init__(self, lu)
7137
7138     # Parameters
7139     self.instance_name = instance_name
7140     self.cleanup = cleanup
7141     self.live = False # will be overridden later
7142     self.failover = failover
7143     self.fallback = fallback
7144     self.ignore_consistency = ignore_consistency
7145     self.shutdown_timeout = shutdown_timeout
7146
7147   def CheckPrereq(self):
7148     """Check prerequisites.
7149
7150     This checks that the instance is in the cluster.
7151
7152     """
7153     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7154     instance = self.cfg.GetInstanceInfo(instance_name)
7155     assert instance is not None
7156     self.instance = instance
7157
7158     if (not self.cleanup and not instance.admin_up and not self.failover and
7159         self.fallback):
7160       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7161                       " to failover")
7162       self.failover = True
7163
7164     if instance.disk_template not in constants.DTS_MIRRORED:
7165       if self.failover:
7166         text = "failovers"
7167       else:
7168         text = "migrations"
7169       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7170                                  " %s" % (instance.disk_template, text),
7171                                  errors.ECODE_STATE)
7172
7173     if instance.disk_template in constants.DTS_EXT_MIRROR:
7174       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7175
7176       if self.lu.op.iallocator:
7177         self._RunAllocator()
7178       else:
7179         # We set set self.target_node as it is required by
7180         # BuildHooksEnv
7181         self.target_node = self.lu.op.target_node
7182
7183       # self.target_node is already populated, either directly or by the
7184       # iallocator run
7185       target_node = self.target_node
7186       if self.target_node == instance.primary_node:
7187         raise errors.OpPrereqError("Cannot migrate instance %s"
7188                                    " to its primary (%s)" %
7189                                    (instance.name, instance.primary_node))
7190
7191       if len(self.lu.tasklets) == 1:
7192         # It is safe to release locks only when we're the only tasklet
7193         # in the LU
7194         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7195                       keep=[instance.primary_node, self.target_node])
7196
7197     else:
7198       secondary_nodes = instance.secondary_nodes
7199       if not secondary_nodes:
7200         raise errors.ConfigurationError("No secondary node but using"
7201                                         " %s disk template" %
7202                                         instance.disk_template)
7203       target_node = secondary_nodes[0]
7204       if self.lu.op.iallocator or (self.lu.op.target_node and
7205                                    self.lu.op.target_node != target_node):
7206         if self.failover:
7207           text = "failed over"
7208         else:
7209           text = "migrated"
7210         raise errors.OpPrereqError("Instances with disk template %s cannot"
7211                                    " be %s to arbitrary nodes"
7212                                    " (neither an iallocator nor a target"
7213                                    " node can be passed)" %
7214                                    (instance.disk_template, text),
7215                                    errors.ECODE_INVAL)
7216
7217     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7218
7219     # check memory requirements on the secondary node
7220     if not self.failover or instance.admin_up:
7221       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7222                            instance.name, i_be[constants.BE_MEMORY],
7223                            instance.hypervisor)
7224     else:
7225       self.lu.LogInfo("Not checking memory on the secondary node as"
7226                       " instance will not be started")
7227
7228     # check bridge existance
7229     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7230
7231     if not self.cleanup:
7232       _CheckNodeNotDrained(self.lu, target_node)
7233       if not self.failover:
7234         result = self.rpc.call_instance_migratable(instance.primary_node,
7235                                                    instance)
7236         if result.fail_msg and self.fallback:
7237           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7238                           " failover")
7239           self.failover = True
7240         else:
7241           result.Raise("Can't migrate, please use failover",
7242                        prereq=True, ecode=errors.ECODE_STATE)
7243
7244     assert not (self.failover and self.cleanup)
7245
7246     if not self.failover:
7247       if self.lu.op.live is not None and self.lu.op.mode is not None:
7248         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7249                                    " parameters are accepted",
7250                                    errors.ECODE_INVAL)
7251       if self.lu.op.live is not None:
7252         if self.lu.op.live:
7253           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7254         else:
7255           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7256         # reset the 'live' parameter to None so that repeated
7257         # invocations of CheckPrereq do not raise an exception
7258         self.lu.op.live = None
7259       elif self.lu.op.mode is None:
7260         # read the default value from the hypervisor
7261         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7262                                                 skip_globals=False)
7263         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7264
7265       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7266     else:
7267       # Failover is never live
7268       self.live = False
7269
7270   def _RunAllocator(self):
7271     """Run the allocator based on input opcode.
7272
7273     """
7274     ial = IAllocator(self.cfg, self.rpc,
7275                      mode=constants.IALLOCATOR_MODE_RELOC,
7276                      name=self.instance_name,
7277                      # TODO See why hail breaks with a single node below
7278                      relocate_from=[self.instance.primary_node,
7279                                     self.instance.primary_node],
7280                      )
7281
7282     ial.Run(self.lu.op.iallocator)
7283
7284     if not ial.success:
7285       raise errors.OpPrereqError("Can't compute nodes using"
7286                                  " iallocator '%s': %s" %
7287                                  (self.lu.op.iallocator, ial.info),
7288                                  errors.ECODE_NORES)
7289     if len(ial.result) != ial.required_nodes:
7290       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7291                                  " of nodes (%s), required %s" %
7292                                  (self.lu.op.iallocator, len(ial.result),
7293                                   ial.required_nodes), errors.ECODE_FAULT)
7294     self.target_node = ial.result[0]
7295     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7296                  self.instance_name, self.lu.op.iallocator,
7297                  utils.CommaJoin(ial.result))
7298
7299   def _WaitUntilSync(self):
7300     """Poll with custom rpc for disk sync.
7301
7302     This uses our own step-based rpc call.
7303
7304     """
7305     self.feedback_fn("* wait until resync is done")
7306     all_done = False
7307     while not all_done:
7308       all_done = True
7309       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7310                                             self.nodes_ip,
7311                                             self.instance.disks)
7312       min_percent = 100
7313       for node, nres in result.items():
7314         nres.Raise("Cannot resync disks on node %s" % node)
7315         node_done, node_percent = nres.payload
7316         all_done = all_done and node_done
7317         if node_percent is not None:
7318           min_percent = min(min_percent, node_percent)
7319       if not all_done:
7320         if min_percent < 100:
7321           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7322         time.sleep(2)
7323
7324   def _EnsureSecondary(self, node):
7325     """Demote a node to secondary.
7326
7327     """
7328     self.feedback_fn("* switching node %s to secondary mode" % node)
7329
7330     for dev in self.instance.disks:
7331       self.cfg.SetDiskID(dev, node)
7332
7333     result = self.rpc.call_blockdev_close(node, self.instance.name,
7334                                           self.instance.disks)
7335     result.Raise("Cannot change disk to secondary on node %s" % node)
7336
7337   def _GoStandalone(self):
7338     """Disconnect from the network.
7339
7340     """
7341     self.feedback_fn("* changing into standalone mode")
7342     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7343                                                self.instance.disks)
7344     for node, nres in result.items():
7345       nres.Raise("Cannot disconnect disks node %s" % node)
7346
7347   def _GoReconnect(self, multimaster):
7348     """Reconnect to the network.
7349
7350     """
7351     if multimaster:
7352       msg = "dual-master"
7353     else:
7354       msg = "single-master"
7355     self.feedback_fn("* changing disks into %s mode" % msg)
7356     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7357                                            self.instance.disks,
7358                                            self.instance.name, multimaster)
7359     for node, nres in result.items():
7360       nres.Raise("Cannot change disks config on node %s" % node)
7361
7362   def _ExecCleanup(self):
7363     """Try to cleanup after a failed migration.
7364
7365     The cleanup is done by:
7366       - check that the instance is running only on one node
7367         (and update the config if needed)
7368       - change disks on its secondary node to secondary
7369       - wait until disks are fully synchronized
7370       - disconnect from the network
7371       - change disks into single-master mode
7372       - wait again until disks are fully synchronized
7373
7374     """
7375     instance = self.instance
7376     target_node = self.target_node
7377     source_node = self.source_node
7378
7379     # check running on only one node
7380     self.feedback_fn("* checking where the instance actually runs"
7381                      " (if this hangs, the hypervisor might be in"
7382                      " a bad state)")
7383     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7384     for node, result in ins_l.items():
7385       result.Raise("Can't contact node %s" % node)
7386
7387     runningon_source = instance.name in ins_l[source_node].payload
7388     runningon_target = instance.name in ins_l[target_node].payload
7389
7390     if runningon_source and runningon_target:
7391       raise errors.OpExecError("Instance seems to be running on two nodes,"
7392                                " or the hypervisor is confused; you will have"
7393                                " to ensure manually that it runs only on one"
7394                                " and restart this operation")
7395
7396     if not (runningon_source or runningon_target):
7397       raise errors.OpExecError("Instance does not seem to be running at all;"
7398                                " in this case it's safer to repair by"
7399                                " running 'gnt-instance stop' to ensure disk"
7400                                " shutdown, and then restarting it")
7401
7402     if runningon_target:
7403       # the migration has actually succeeded, we need to update the config
7404       self.feedback_fn("* instance running on secondary node (%s),"
7405                        " updating config" % target_node)
7406       instance.primary_node = target_node
7407       self.cfg.Update(instance, self.feedback_fn)
7408       demoted_node = source_node
7409     else:
7410       self.feedback_fn("* instance confirmed to be running on its"
7411                        " primary node (%s)" % source_node)
7412       demoted_node = target_node
7413
7414     if instance.disk_template in constants.DTS_INT_MIRROR:
7415       self._EnsureSecondary(demoted_node)
7416       try:
7417         self._WaitUntilSync()
7418       except errors.OpExecError:
7419         # we ignore here errors, since if the device is standalone, it
7420         # won't be able to sync
7421         pass
7422       self._GoStandalone()
7423       self._GoReconnect(False)
7424       self._WaitUntilSync()
7425
7426     self.feedback_fn("* done")
7427
7428   def _RevertDiskStatus(self):
7429     """Try to revert the disk status after a failed migration.
7430
7431     """
7432     target_node = self.target_node
7433     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7434       return
7435
7436     try:
7437       self._EnsureSecondary(target_node)
7438       self._GoStandalone()
7439       self._GoReconnect(False)
7440       self._WaitUntilSync()
7441     except errors.OpExecError, err:
7442       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7443                          " please try to recover the instance manually;"
7444                          " error '%s'" % str(err))
7445
7446   def _AbortMigration(self):
7447     """Call the hypervisor code to abort a started migration.
7448
7449     """
7450     instance = self.instance
7451     target_node = self.target_node
7452     source_node = self.source_node
7453     migration_info = self.migration_info
7454
7455     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7456                                                                  instance,
7457                                                                  migration_info,
7458                                                                  False)
7459     abort_msg = abort_result.fail_msg
7460     if abort_msg:
7461       logging.error("Aborting migration failed on target node %s: %s",
7462                     target_node, abort_msg)
7463       # Don't raise an exception here, as we stil have to try to revert the
7464       # disk status, even if this step failed.
7465
7466     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7467         instance, False, self.live)
7468     abort_msg = abort_result.fail_msg
7469     if abort_msg:
7470       logging.error("Aborting migration failed on source node %s: %s",
7471                     source_node, abort_msg)
7472
7473   def _ExecMigration(self):
7474     """Migrate an instance.
7475
7476     The migrate is done by:
7477       - change the disks into dual-master mode
7478       - wait until disks are fully synchronized again
7479       - migrate the instance
7480       - change disks on the new secondary node (the old primary) to secondary
7481       - wait until disks are fully synchronized
7482       - change disks into single-master mode
7483
7484     """
7485     instance = self.instance
7486     target_node = self.target_node
7487     source_node = self.source_node
7488
7489     # Check for hypervisor version mismatch and warn the user.
7490     nodeinfo = self.rpc.call_node_info([source_node, target_node],
7491                                        None, self.instance.hypervisor)
7492     src_info = nodeinfo[source_node]
7493     dst_info = nodeinfo[target_node]
7494
7495     if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7496         (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7497       src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7498       dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7499       if src_version != dst_version:
7500         self.feedback_fn("* warning: hypervisor version mismatch between"
7501                          " source (%s) and target (%s) node" %
7502                          (src_version, dst_version))
7503
7504     self.feedback_fn("* checking disk consistency between source and target")
7505     for dev in instance.disks:
7506       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7507         raise errors.OpExecError("Disk %s is degraded or not fully"
7508                                  " synchronized on target node,"
7509                                  " aborting migration" % dev.iv_name)
7510
7511     # First get the migration information from the remote node
7512     result = self.rpc.call_migration_info(source_node, instance)
7513     msg = result.fail_msg
7514     if msg:
7515       log_err = ("Failed fetching source migration information from %s: %s" %
7516                  (source_node, msg))
7517       logging.error(log_err)
7518       raise errors.OpExecError(log_err)
7519
7520     self.migration_info = migration_info = result.payload
7521
7522     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7523       # Then switch the disks to master/master mode
7524       self._EnsureSecondary(target_node)
7525       self._GoStandalone()
7526       self._GoReconnect(True)
7527       self._WaitUntilSync()
7528
7529     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7530     result = self.rpc.call_accept_instance(target_node,
7531                                            instance,
7532                                            migration_info,
7533                                            self.nodes_ip[target_node])
7534
7535     msg = result.fail_msg
7536     if msg:
7537       logging.error("Instance pre-migration failed, trying to revert"
7538                     " disk status: %s", msg)
7539       self.feedback_fn("Pre-migration failed, aborting")
7540       self._AbortMigration()
7541       self._RevertDiskStatus()
7542       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7543                                (instance.name, msg))
7544
7545     self.feedback_fn("* migrating instance to %s" % target_node)
7546     result = self.rpc.call_instance_migrate(source_node, instance,
7547                                             self.nodes_ip[target_node],
7548                                             self.live)
7549     msg = result.fail_msg
7550     if msg:
7551       logging.error("Instance migration failed, trying to revert"
7552                     " disk status: %s", msg)
7553       self.feedback_fn("Migration failed, aborting")
7554       self._AbortMigration()
7555       self._RevertDiskStatus()
7556       raise errors.OpExecError("Could not migrate instance %s: %s" %
7557                                (instance.name, msg))
7558
7559     self.feedback_fn("* starting memory transfer")
7560     last_feedback = time.time()
7561     while True:
7562       result = self.rpc.call_instance_get_migration_status(source_node,
7563                                                            instance)
7564       msg = result.fail_msg
7565       ms = result.payload   # MigrationStatus instance
7566       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7567         logging.error("Instance migration failed, trying to revert"
7568                       " disk status: %s", msg)
7569         self.feedback_fn("Migration failed, aborting")
7570         self._AbortMigration()
7571         self._RevertDiskStatus()
7572         raise errors.OpExecError("Could not migrate instance %s: %s" %
7573                                  (instance.name, msg))
7574
7575       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7576         self.feedback_fn("* memory transfer complete")
7577         break
7578
7579       if (utils.TimeoutExpired(last_feedback,
7580                                self._MIGRATION_FEEDBACK_INTERVAL) and
7581           ms.transferred_ram is not None):
7582         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7583         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7584         last_feedback = time.time()
7585
7586       time.sleep(self._MIGRATION_POLL_INTERVAL)
7587
7588     result = self.rpc.call_instance_finalize_migration_src(source_node,
7589                                                            instance,
7590                                                            True,
7591                                                            self.live)
7592     msg = result.fail_msg
7593     if msg:
7594       logging.error("Instance migration succeeded, but finalization failed"
7595                     " on the source node: %s", msg)
7596       raise errors.OpExecError("Could not finalize instance migration: %s" %
7597                                msg)
7598
7599     instance.primary_node = target_node
7600
7601     # distribute new instance config to the other nodes
7602     self.cfg.Update(instance, self.feedback_fn)
7603
7604     result = self.rpc.call_instance_finalize_migration_dst(target_node,
7605                                                            instance,
7606                                                            migration_info,
7607                                                            True)
7608     msg = result.fail_msg
7609     if msg:
7610       logging.error("Instance migration succeeded, but finalization failed"
7611                     " on the target node: %s", msg)
7612       raise errors.OpExecError("Could not finalize instance migration: %s" %
7613                                msg)
7614
7615     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7616       self._EnsureSecondary(source_node)
7617       self._WaitUntilSync()
7618       self._GoStandalone()
7619       self._GoReconnect(False)
7620       self._WaitUntilSync()
7621
7622     self.feedback_fn("* done")
7623
7624   def _ExecFailover(self):
7625     """Failover an instance.
7626
7627     The failover is done by shutting it down on its present node and
7628     starting it on the secondary.
7629
7630     """
7631     instance = self.instance
7632     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7633
7634     source_node = instance.primary_node
7635     target_node = self.target_node
7636
7637     if instance.admin_up:
7638       self.feedback_fn("* checking disk consistency between source and target")
7639       for dev in instance.disks:
7640         # for drbd, these are drbd over lvm
7641         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7642           if primary_node.offline:
7643             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7644                              " target node %s" %
7645                              (primary_node.name, dev.iv_name, target_node))
7646           elif not self.ignore_consistency:
7647             raise errors.OpExecError("Disk %s is degraded on target node,"
7648                                      " aborting failover" % dev.iv_name)
7649     else:
7650       self.feedback_fn("* not checking disk consistency as instance is not"
7651                        " running")
7652
7653     self.feedback_fn("* shutting down instance on source node")
7654     logging.info("Shutting down instance %s on node %s",
7655                  instance.name, source_node)
7656
7657     result = self.rpc.call_instance_shutdown(source_node, instance,
7658                                              self.shutdown_timeout)
7659     msg = result.fail_msg
7660     if msg:
7661       if self.ignore_consistency or primary_node.offline:
7662         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7663                            " proceeding anyway; please make sure node"
7664                            " %s is down; error details: %s",
7665                            instance.name, source_node, source_node, msg)
7666       else:
7667         raise errors.OpExecError("Could not shutdown instance %s on"
7668                                  " node %s: %s" %
7669                                  (instance.name, source_node, msg))
7670
7671     self.feedback_fn("* deactivating the instance's disks on source node")
7672     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7673       raise errors.OpExecError("Can't shut down the instance's disks")
7674
7675     instance.primary_node = target_node
7676     # distribute new instance config to the other nodes
7677     self.cfg.Update(instance, self.feedback_fn)
7678
7679     # Only start the instance if it's marked as up
7680     if instance.admin_up:
7681       self.feedback_fn("* activating the instance's disks on target node %s" %
7682                        target_node)
7683       logging.info("Starting instance %s on node %s",
7684                    instance.name, target_node)
7685
7686       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7687                                            ignore_secondaries=True)
7688       if not disks_ok:
7689         _ShutdownInstanceDisks(self.lu, instance)
7690         raise errors.OpExecError("Can't activate the instance's disks")
7691
7692       self.feedback_fn("* starting the instance on the target node %s" %
7693                        target_node)
7694       result = self.rpc.call_instance_start(target_node, instance, None, None,
7695                                             False)
7696       msg = result.fail_msg
7697       if msg:
7698         _ShutdownInstanceDisks(self.lu, instance)
7699         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7700                                  (instance.name, target_node, msg))
7701
7702   def Exec(self, feedback_fn):
7703     """Perform the migration.
7704
7705     """
7706     self.feedback_fn = feedback_fn
7707     self.source_node = self.instance.primary_node
7708
7709     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7710     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7711       self.target_node = self.instance.secondary_nodes[0]
7712       # Otherwise self.target_node has been populated either
7713       # directly, or through an iallocator.
7714
7715     self.all_nodes = [self.source_node, self.target_node]
7716     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7717                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7718
7719     if self.failover:
7720       feedback_fn("Failover instance %s" % self.instance.name)
7721       self._ExecFailover()
7722     else:
7723       feedback_fn("Migrating instance %s" % self.instance.name)
7724
7725       if self.cleanup:
7726         return self._ExecCleanup()
7727       else:
7728         return self._ExecMigration()
7729
7730
7731 def _CreateBlockDev(lu, node, instance, device, force_create,
7732                     info, force_open):
7733   """Create a tree of block devices on a given node.
7734
7735   If this device type has to be created on secondaries, create it and
7736   all its children.
7737
7738   If not, just recurse to children keeping the same 'force' value.
7739
7740   @param lu: the lu on whose behalf we execute
7741   @param node: the node on which to create the device
7742   @type instance: L{objects.Instance}
7743   @param instance: the instance which owns the device
7744   @type device: L{objects.Disk}
7745   @param device: the device to create
7746   @type force_create: boolean
7747   @param force_create: whether to force creation of this device; this
7748       will be change to True whenever we find a device which has
7749       CreateOnSecondary() attribute
7750   @param info: the extra 'metadata' we should attach to the device
7751       (this will be represented as a LVM tag)
7752   @type force_open: boolean
7753   @param force_open: this parameter will be passes to the
7754       L{backend.BlockdevCreate} function where it specifies
7755       whether we run on primary or not, and it affects both
7756       the child assembly and the device own Open() execution
7757
7758   """
7759   if device.CreateOnSecondary():
7760     force_create = True
7761
7762   if device.children:
7763     for child in device.children:
7764       _CreateBlockDev(lu, node, instance, child, force_create,
7765                       info, force_open)
7766
7767   if not force_create:
7768     return
7769
7770   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7771
7772
7773 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7774   """Create a single block device on a given node.
7775
7776   This will not recurse over children of the device, so they must be
7777   created in advance.
7778
7779   @param lu: the lu on whose behalf we execute
7780   @param node: the node on which to create the device
7781   @type instance: L{objects.Instance}
7782   @param instance: the instance which owns the device
7783   @type device: L{objects.Disk}
7784   @param device: the device to create
7785   @param info: the extra 'metadata' we should attach to the device
7786       (this will be represented as a LVM tag)
7787   @type force_open: boolean
7788   @param force_open: this parameter will be passes to the
7789       L{backend.BlockdevCreate} function where it specifies
7790       whether we run on primary or not, and it affects both
7791       the child assembly and the device own Open() execution
7792
7793   """
7794   lu.cfg.SetDiskID(device, node)
7795   result = lu.rpc.call_blockdev_create(node, device, device.size,
7796                                        instance.name, force_open, info)
7797   result.Raise("Can't create block device %s on"
7798                " node %s for instance %s" % (device, node, instance.name))
7799   if device.physical_id is None:
7800     device.physical_id = result.payload
7801
7802
7803 def _GenerateUniqueNames(lu, exts):
7804   """Generate a suitable LV name.
7805
7806   This will generate a logical volume name for the given instance.
7807
7808   """
7809   results = []
7810   for val in exts:
7811     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7812     results.append("%s%s" % (new_id, val))
7813   return results
7814
7815
7816 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7817                          iv_name, p_minor, s_minor):
7818   """Generate a drbd8 device complete with its children.
7819
7820   """
7821   assert len(vgnames) == len(names) == 2
7822   port = lu.cfg.AllocatePort()
7823   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7824   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7825                           logical_id=(vgnames[0], names[0]))
7826   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7827                           logical_id=(vgnames[1], names[1]))
7828   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7829                           logical_id=(primary, secondary, port,
7830                                       p_minor, s_minor,
7831                                       shared_secret),
7832                           children=[dev_data, dev_meta],
7833                           iv_name=iv_name)
7834   return drbd_dev
7835
7836
7837 def _GenerateDiskTemplate(lu, template_name,
7838                           instance_name, primary_node,
7839                           secondary_nodes, disk_info,
7840                           file_storage_dir, file_driver,
7841                           base_index, feedback_fn):
7842   """Generate the entire disk layout for a given template type.
7843
7844   """
7845   #TODO: compute space requirements
7846
7847   vgname = lu.cfg.GetVGName()
7848   disk_count = len(disk_info)
7849   disks = []
7850   if template_name == constants.DT_DISKLESS:
7851     pass
7852   elif template_name == constants.DT_PLAIN:
7853     if len(secondary_nodes) != 0:
7854       raise errors.ProgrammerError("Wrong template configuration")
7855
7856     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7857                                       for i in range(disk_count)])
7858     for idx, disk in enumerate(disk_info):
7859       disk_index = idx + base_index
7860       vg = disk.get(constants.IDISK_VG, vgname)
7861       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7862       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7863                               size=disk[constants.IDISK_SIZE],
7864                               logical_id=(vg, names[idx]),
7865                               iv_name="disk/%d" % disk_index,
7866                               mode=disk[constants.IDISK_MODE])
7867       disks.append(disk_dev)
7868   elif template_name == constants.DT_DRBD8:
7869     if len(secondary_nodes) != 1:
7870       raise errors.ProgrammerError("Wrong template configuration")
7871     remote_node = secondary_nodes[0]
7872     minors = lu.cfg.AllocateDRBDMinor(
7873       [primary_node, remote_node] * len(disk_info), instance_name)
7874
7875     names = []
7876     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7877                                                for i in range(disk_count)]):
7878       names.append(lv_prefix + "_data")
7879       names.append(lv_prefix + "_meta")
7880     for idx, disk in enumerate(disk_info):
7881       disk_index = idx + base_index
7882       data_vg = disk.get(constants.IDISK_VG, vgname)
7883       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7884       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7885                                       disk[constants.IDISK_SIZE],
7886                                       [data_vg, meta_vg],
7887                                       names[idx * 2:idx * 2 + 2],
7888                                       "disk/%d" % disk_index,
7889                                       minors[idx * 2], minors[idx * 2 + 1])
7890       disk_dev.mode = disk[constants.IDISK_MODE]
7891       disks.append(disk_dev)
7892   elif template_name == constants.DT_FILE:
7893     if len(secondary_nodes) != 0:
7894       raise errors.ProgrammerError("Wrong template configuration")
7895
7896     opcodes.RequireFileStorage()
7897
7898     for idx, disk in enumerate(disk_info):
7899       disk_index = idx + base_index
7900       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7901                               size=disk[constants.IDISK_SIZE],
7902                               iv_name="disk/%d" % disk_index,
7903                               logical_id=(file_driver,
7904                                           "%s/disk%d" % (file_storage_dir,
7905                                                          disk_index)),
7906                               mode=disk[constants.IDISK_MODE])
7907       disks.append(disk_dev)
7908   elif template_name == constants.DT_SHARED_FILE:
7909     if len(secondary_nodes) != 0:
7910       raise errors.ProgrammerError("Wrong template configuration")
7911
7912     opcodes.RequireSharedFileStorage()
7913
7914     for idx, disk in enumerate(disk_info):
7915       disk_index = idx + base_index
7916       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7917                               size=disk[constants.IDISK_SIZE],
7918                               iv_name="disk/%d" % disk_index,
7919                               logical_id=(file_driver,
7920                                           "%s/disk%d" % (file_storage_dir,
7921                                                          disk_index)),
7922                               mode=disk[constants.IDISK_MODE])
7923       disks.append(disk_dev)
7924   elif template_name == constants.DT_BLOCK:
7925     if len(secondary_nodes) != 0:
7926       raise errors.ProgrammerError("Wrong template configuration")
7927
7928     for idx, disk in enumerate(disk_info):
7929       disk_index = idx + base_index
7930       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7931                               size=disk[constants.IDISK_SIZE],
7932                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7933                                           disk[constants.IDISK_ADOPT]),
7934                               iv_name="disk/%d" % disk_index,
7935                               mode=disk[constants.IDISK_MODE])
7936       disks.append(disk_dev)
7937
7938   else:
7939     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7940   return disks
7941
7942
7943 def _GetInstanceInfoText(instance):
7944   """Compute that text that should be added to the disk's metadata.
7945
7946   """
7947   return "originstname+%s" % instance.name
7948
7949
7950 def _CalcEta(time_taken, written, total_size):
7951   """Calculates the ETA based on size written and total size.
7952
7953   @param time_taken: The time taken so far
7954   @param written: amount written so far
7955   @param total_size: The total size of data to be written
7956   @return: The remaining time in seconds
7957
7958   """
7959   avg_time = time_taken / float(written)
7960   return (total_size - written) * avg_time
7961
7962
7963 def _WipeDisks(lu, instance):
7964   """Wipes instance disks.
7965
7966   @type lu: L{LogicalUnit}
7967   @param lu: the logical unit on whose behalf we execute
7968   @type instance: L{objects.Instance}
7969   @param instance: the instance whose disks we should create
7970   @return: the success of the wipe
7971
7972   """
7973   node = instance.primary_node
7974
7975   for device in instance.disks:
7976     lu.cfg.SetDiskID(device, node)
7977
7978   logging.info("Pause sync of instance %s disks", instance.name)
7979   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7980
7981   for idx, success in enumerate(result.payload):
7982     if not success:
7983       logging.warn("pause-sync of instance %s for disks %d failed",
7984                    instance.name, idx)
7985
7986   try:
7987     for idx, device in enumerate(instance.disks):
7988       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7989       # MAX_WIPE_CHUNK at max
7990       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7991                             constants.MIN_WIPE_CHUNK_PERCENT)
7992       # we _must_ make this an int, otherwise rounding errors will
7993       # occur
7994       wipe_chunk_size = int(wipe_chunk_size)
7995
7996       lu.LogInfo("* Wiping disk %d", idx)
7997       logging.info("Wiping disk %d for instance %s, node %s using"
7998                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7999
8000       offset = 0
8001       size = device.size
8002       last_output = 0
8003       start_time = time.time()
8004
8005       while offset < size:
8006         wipe_size = min(wipe_chunk_size, size - offset)
8007         logging.debug("Wiping disk %d, offset %s, chunk %s",
8008                       idx, offset, wipe_size)
8009         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8010         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8011                      (idx, offset, wipe_size))
8012         now = time.time()
8013         offset += wipe_size
8014         if now - last_output >= 60:
8015           eta = _CalcEta(now - start_time, offset, size)
8016           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8017                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8018           last_output = now
8019   finally:
8020     logging.info("Resume sync of instance %s disks", instance.name)
8021
8022     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8023
8024     for idx, success in enumerate(result.payload):
8025       if not success:
8026         lu.LogWarning("Resume sync of disk %d failed, please have a"
8027                       " look at the status and troubleshoot the issue", idx)
8028         logging.warn("resume-sync of instance %s for disks %d failed",
8029                      instance.name, idx)
8030
8031
8032 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8033   """Create all disks for an instance.
8034
8035   This abstracts away some work from AddInstance.
8036
8037   @type lu: L{LogicalUnit}
8038   @param lu: the logical unit on whose behalf we execute
8039   @type instance: L{objects.Instance}
8040   @param instance: the instance whose disks we should create
8041   @type to_skip: list
8042   @param to_skip: list of indices to skip
8043   @type target_node: string
8044   @param target_node: if passed, overrides the target node for creation
8045   @rtype: boolean
8046   @return: the success of the creation
8047
8048   """
8049   info = _GetInstanceInfoText(instance)
8050   if target_node is None:
8051     pnode = instance.primary_node
8052     all_nodes = instance.all_nodes
8053   else:
8054     pnode = target_node
8055     all_nodes = [pnode]
8056
8057   if instance.disk_template in constants.DTS_FILEBASED:
8058     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8059     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8060
8061     result.Raise("Failed to create directory '%s' on"
8062                  " node %s" % (file_storage_dir, pnode))
8063
8064   # Note: this needs to be kept in sync with adding of disks in
8065   # LUInstanceSetParams
8066   for idx, device in enumerate(instance.disks):
8067     if to_skip and idx in to_skip:
8068       continue
8069     logging.info("Creating volume %s for instance %s",
8070                  device.iv_name, instance.name)
8071     #HARDCODE
8072     for node in all_nodes:
8073       f_create = node == pnode
8074       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8075
8076
8077 def _RemoveDisks(lu, instance, target_node=None):
8078   """Remove all disks for an instance.
8079
8080   This abstracts away some work from `AddInstance()` and
8081   `RemoveInstance()`. Note that in case some of the devices couldn't
8082   be removed, the removal will continue with the other ones (compare
8083   with `_CreateDisks()`).
8084
8085   @type lu: L{LogicalUnit}
8086   @param lu: the logical unit on whose behalf we execute
8087   @type instance: L{objects.Instance}
8088   @param instance: the instance whose disks we should remove
8089   @type target_node: string
8090   @param target_node: used to override the node on which to remove the disks
8091   @rtype: boolean
8092   @return: the success of the removal
8093
8094   """
8095   logging.info("Removing block devices for instance %s", instance.name)
8096
8097   all_result = True
8098   for device in instance.disks:
8099     if target_node:
8100       edata = [(target_node, device)]
8101     else:
8102       edata = device.ComputeNodeTree(instance.primary_node)
8103     for node, disk in edata:
8104       lu.cfg.SetDiskID(disk, node)
8105       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8106       if msg:
8107         lu.LogWarning("Could not remove block device %s on node %s,"
8108                       " continuing anyway: %s", device.iv_name, node, msg)
8109         all_result = False
8110
8111   if instance.disk_template == constants.DT_FILE:
8112     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8113     if target_node:
8114       tgt = target_node
8115     else:
8116       tgt = instance.primary_node
8117     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8118     if result.fail_msg:
8119       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8120                     file_storage_dir, instance.primary_node, result.fail_msg)
8121       all_result = False
8122
8123   return all_result
8124
8125
8126 def _ComputeDiskSizePerVG(disk_template, disks):
8127   """Compute disk size requirements in the volume group
8128
8129   """
8130   def _compute(disks, payload):
8131     """Universal algorithm.
8132
8133     """
8134     vgs = {}
8135     for disk in disks:
8136       vgs[disk[constants.IDISK_VG]] = \
8137         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8138
8139     return vgs
8140
8141   # Required free disk space as a function of disk and swap space
8142   req_size_dict = {
8143     constants.DT_DISKLESS: {},
8144     constants.DT_PLAIN: _compute(disks, 0),
8145     # 128 MB are added for drbd metadata for each disk
8146     constants.DT_DRBD8: _compute(disks, 128),
8147     constants.DT_FILE: {},
8148     constants.DT_SHARED_FILE: {},
8149   }
8150
8151   if disk_template not in req_size_dict:
8152     raise errors.ProgrammerError("Disk template '%s' size requirement"
8153                                  " is unknown" % disk_template)
8154
8155   return req_size_dict[disk_template]
8156
8157
8158 def _ComputeDiskSize(disk_template, disks):
8159   """Compute disk size requirements in the volume group
8160
8161   """
8162   # Required free disk space as a function of disk and swap space
8163   req_size_dict = {
8164     constants.DT_DISKLESS: None,
8165     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8166     # 128 MB are added for drbd metadata for each disk
8167     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8168     constants.DT_FILE: None,
8169     constants.DT_SHARED_FILE: 0,
8170     constants.DT_BLOCK: 0,
8171   }
8172
8173   if disk_template not in req_size_dict:
8174     raise errors.ProgrammerError("Disk template '%s' size requirement"
8175                                  " is unknown" % disk_template)
8176
8177   return req_size_dict[disk_template]
8178
8179
8180 def _FilterVmNodes(lu, nodenames):
8181   """Filters out non-vm_capable nodes from a list.
8182
8183   @type lu: L{LogicalUnit}
8184   @param lu: the logical unit for which we check
8185   @type nodenames: list
8186   @param nodenames: the list of nodes on which we should check
8187   @rtype: list
8188   @return: the list of vm-capable nodes
8189
8190   """
8191   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8192   return [name for name in nodenames if name not in vm_nodes]
8193
8194
8195 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8196   """Hypervisor parameter validation.
8197
8198   This function abstract the hypervisor parameter validation to be
8199   used in both instance create and instance modify.
8200
8201   @type lu: L{LogicalUnit}
8202   @param lu: the logical unit for which we check
8203   @type nodenames: list
8204   @param nodenames: the list of nodes on which we should check
8205   @type hvname: string
8206   @param hvname: the name of the hypervisor we should use
8207   @type hvparams: dict
8208   @param hvparams: the parameters which we need to check
8209   @raise errors.OpPrereqError: if the parameters are not valid
8210
8211   """
8212   nodenames = _FilterVmNodes(lu, nodenames)
8213   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8214                                                   hvname,
8215                                                   hvparams)
8216   for node in nodenames:
8217     info = hvinfo[node]
8218     if info.offline:
8219       continue
8220     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8221
8222
8223 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8224   """OS parameters validation.
8225
8226   @type lu: L{LogicalUnit}
8227   @param lu: the logical unit for which we check
8228   @type required: boolean
8229   @param required: whether the validation should fail if the OS is not
8230       found
8231   @type nodenames: list
8232   @param nodenames: the list of nodes on which we should check
8233   @type osname: string
8234   @param osname: the name of the hypervisor we should use
8235   @type osparams: dict
8236   @param osparams: the parameters which we need to check
8237   @raise errors.OpPrereqError: if the parameters are not valid
8238
8239   """
8240   nodenames = _FilterVmNodes(lu, nodenames)
8241   result = lu.rpc.call_os_validate(required, nodenames, osname,
8242                                    [constants.OS_VALIDATE_PARAMETERS],
8243                                    osparams)
8244   for node, nres in result.items():
8245     # we don't check for offline cases since this should be run only
8246     # against the master node and/or an instance's nodes
8247     nres.Raise("OS Parameters validation failed on node %s" % node)
8248     if not nres.payload:
8249       lu.LogInfo("OS %s not found on node %s, validation skipped",
8250                  osname, node)
8251
8252
8253 class LUInstanceCreate(LogicalUnit):
8254   """Create an instance.
8255
8256   """
8257   HPATH = "instance-add"
8258   HTYPE = constants.HTYPE_INSTANCE
8259   REQ_BGL = False
8260
8261   def CheckArguments(self):
8262     """Check arguments.
8263
8264     """
8265     # do not require name_check to ease forward/backward compatibility
8266     # for tools
8267     if self.op.no_install and self.op.start:
8268       self.LogInfo("No-installation mode selected, disabling startup")
8269       self.op.start = False
8270     # validate/normalize the instance name
8271     self.op.instance_name = \
8272       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8273
8274     if self.op.ip_check and not self.op.name_check:
8275       # TODO: make the ip check more flexible and not depend on the name check
8276       raise errors.OpPrereqError("Cannot do IP address check without a name"
8277                                  " check", errors.ECODE_INVAL)
8278
8279     # check nics' parameter names
8280     for nic in self.op.nics:
8281       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8282
8283     # check disks. parameter names and consistent adopt/no-adopt strategy
8284     has_adopt = has_no_adopt = False
8285     for disk in self.op.disks:
8286       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8287       if constants.IDISK_ADOPT in disk:
8288         has_adopt = True
8289       else:
8290         has_no_adopt = True
8291     if has_adopt and has_no_adopt:
8292       raise errors.OpPrereqError("Either all disks are adopted or none is",
8293                                  errors.ECODE_INVAL)
8294     if has_adopt:
8295       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8296         raise errors.OpPrereqError("Disk adoption is not supported for the"
8297                                    " '%s' disk template" %
8298                                    self.op.disk_template,
8299                                    errors.ECODE_INVAL)
8300       if self.op.iallocator is not None:
8301         raise errors.OpPrereqError("Disk adoption not allowed with an"
8302                                    " iallocator script", errors.ECODE_INVAL)
8303       if self.op.mode == constants.INSTANCE_IMPORT:
8304         raise errors.OpPrereqError("Disk adoption not allowed for"
8305                                    " instance import", errors.ECODE_INVAL)
8306     else:
8307       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8308         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8309                                    " but no 'adopt' parameter given" %
8310                                    self.op.disk_template,
8311                                    errors.ECODE_INVAL)
8312
8313     self.adopt_disks = has_adopt
8314
8315     # instance name verification
8316     if self.op.name_check:
8317       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8318       self.op.instance_name = self.hostname1.name
8319       # used in CheckPrereq for ip ping check
8320       self.check_ip = self.hostname1.ip
8321     else:
8322       self.check_ip = None
8323
8324     # file storage checks
8325     if (self.op.file_driver and
8326         not self.op.file_driver in constants.FILE_DRIVER):
8327       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8328                                  self.op.file_driver, errors.ECODE_INVAL)
8329
8330     if self.op.disk_template == constants.DT_FILE:
8331       opcodes.RequireFileStorage()
8332     elif self.op.disk_template == constants.DT_SHARED_FILE:
8333       opcodes.RequireSharedFileStorage()
8334
8335     ### Node/iallocator related checks
8336     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8337
8338     if self.op.pnode is not None:
8339       if self.op.disk_template in constants.DTS_INT_MIRROR:
8340         if self.op.snode is None:
8341           raise errors.OpPrereqError("The networked disk templates need"
8342                                      " a mirror node", errors.ECODE_INVAL)
8343       elif self.op.snode:
8344         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8345                         " template")
8346         self.op.snode = None
8347
8348     self._cds = _GetClusterDomainSecret()
8349
8350     if self.op.mode == constants.INSTANCE_IMPORT:
8351       # On import force_variant must be True, because if we forced it at
8352       # initial install, our only chance when importing it back is that it
8353       # works again!
8354       self.op.force_variant = True
8355
8356       if self.op.no_install:
8357         self.LogInfo("No-installation mode has no effect during import")
8358
8359     elif self.op.mode == constants.INSTANCE_CREATE:
8360       if self.op.os_type is None:
8361         raise errors.OpPrereqError("No guest OS specified",
8362                                    errors.ECODE_INVAL)
8363       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8364         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8365                                    " installation" % self.op.os_type,
8366                                    errors.ECODE_STATE)
8367       if self.op.disk_template is None:
8368         raise errors.OpPrereqError("No disk template specified",
8369                                    errors.ECODE_INVAL)
8370
8371     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8372       # Check handshake to ensure both clusters have the same domain secret
8373       src_handshake = self.op.source_handshake
8374       if not src_handshake:
8375         raise errors.OpPrereqError("Missing source handshake",
8376                                    errors.ECODE_INVAL)
8377
8378       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8379                                                            src_handshake)
8380       if errmsg:
8381         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8382                                    errors.ECODE_INVAL)
8383
8384       # Load and check source CA
8385       self.source_x509_ca_pem = self.op.source_x509_ca
8386       if not self.source_x509_ca_pem:
8387         raise errors.OpPrereqError("Missing source X509 CA",
8388                                    errors.ECODE_INVAL)
8389
8390       try:
8391         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8392                                                     self._cds)
8393       except OpenSSL.crypto.Error, err:
8394         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8395                                    (err, ), errors.ECODE_INVAL)
8396
8397       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8398       if errcode is not None:
8399         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8400                                    errors.ECODE_INVAL)
8401
8402       self.source_x509_ca = cert
8403
8404       src_instance_name = self.op.source_instance_name
8405       if not src_instance_name:
8406         raise errors.OpPrereqError("Missing source instance name",
8407                                    errors.ECODE_INVAL)
8408
8409       self.source_instance_name = \
8410           netutils.GetHostname(name=src_instance_name).name
8411
8412     else:
8413       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8414                                  self.op.mode, errors.ECODE_INVAL)
8415
8416   def ExpandNames(self):
8417     """ExpandNames for CreateInstance.
8418
8419     Figure out the right locks for instance creation.
8420
8421     """
8422     self.needed_locks = {}
8423
8424     instance_name = self.op.instance_name
8425     # this is just a preventive check, but someone might still add this
8426     # instance in the meantime, and creation will fail at lock-add time
8427     if instance_name in self.cfg.GetInstanceList():
8428       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8429                                  instance_name, errors.ECODE_EXISTS)
8430
8431     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8432
8433     if self.op.iallocator:
8434       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8435     else:
8436       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8437       nodelist = [self.op.pnode]
8438       if self.op.snode is not None:
8439         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8440         nodelist.append(self.op.snode)
8441       self.needed_locks[locking.LEVEL_NODE] = nodelist
8442
8443     # in case of import lock the source node too
8444     if self.op.mode == constants.INSTANCE_IMPORT:
8445       src_node = self.op.src_node
8446       src_path = self.op.src_path
8447
8448       if src_path is None:
8449         self.op.src_path = src_path = self.op.instance_name
8450
8451       if src_node is None:
8452         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8453         self.op.src_node = None
8454         if os.path.isabs(src_path):
8455           raise errors.OpPrereqError("Importing an instance from a path"
8456                                      " requires a source node option",
8457                                      errors.ECODE_INVAL)
8458       else:
8459         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8460         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8461           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8462         if not os.path.isabs(src_path):
8463           self.op.src_path = src_path = \
8464             utils.PathJoin(constants.EXPORT_DIR, src_path)
8465
8466   def _RunAllocator(self):
8467     """Run the allocator based on input opcode.
8468
8469     """
8470     nics = [n.ToDict() for n in self.nics]
8471     ial = IAllocator(self.cfg, self.rpc,
8472                      mode=constants.IALLOCATOR_MODE_ALLOC,
8473                      name=self.op.instance_name,
8474                      disk_template=self.op.disk_template,
8475                      tags=self.op.tags,
8476                      os=self.op.os_type,
8477                      vcpus=self.be_full[constants.BE_VCPUS],
8478                      memory=self.be_full[constants.BE_MEMORY],
8479                      disks=self.disks,
8480                      nics=nics,
8481                      hypervisor=self.op.hypervisor,
8482                      )
8483
8484     ial.Run(self.op.iallocator)
8485
8486     if not ial.success:
8487       raise errors.OpPrereqError("Can't compute nodes using"
8488                                  " iallocator '%s': %s" %
8489                                  (self.op.iallocator, ial.info),
8490                                  errors.ECODE_NORES)
8491     if len(ial.result) != ial.required_nodes:
8492       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8493                                  " of nodes (%s), required %s" %
8494                                  (self.op.iallocator, len(ial.result),
8495                                   ial.required_nodes), errors.ECODE_FAULT)
8496     self.op.pnode = ial.result[0]
8497     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8498                  self.op.instance_name, self.op.iallocator,
8499                  utils.CommaJoin(ial.result))
8500     if ial.required_nodes == 2:
8501       self.op.snode = ial.result[1]
8502
8503   def BuildHooksEnv(self):
8504     """Build hooks env.
8505
8506     This runs on master, primary and secondary nodes of the instance.
8507
8508     """
8509     env = {
8510       "ADD_MODE": self.op.mode,
8511       }
8512     if self.op.mode == constants.INSTANCE_IMPORT:
8513       env["SRC_NODE"] = self.op.src_node
8514       env["SRC_PATH"] = self.op.src_path
8515       env["SRC_IMAGES"] = self.src_images
8516
8517     env.update(_BuildInstanceHookEnv(
8518       name=self.op.instance_name,
8519       primary_node=self.op.pnode,
8520       secondary_nodes=self.secondaries,
8521       status=self.op.start,
8522       os_type=self.op.os_type,
8523       memory=self.be_full[constants.BE_MEMORY],
8524       vcpus=self.be_full[constants.BE_VCPUS],
8525       nics=_NICListToTuple(self, self.nics),
8526       disk_template=self.op.disk_template,
8527       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8528              for d in self.disks],
8529       bep=self.be_full,
8530       hvp=self.hv_full,
8531       hypervisor_name=self.op.hypervisor,
8532       tags=self.op.tags,
8533     ))
8534
8535     return env
8536
8537   def BuildHooksNodes(self):
8538     """Build hooks nodes.
8539
8540     """
8541     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8542     return nl, nl
8543
8544   def _ReadExportInfo(self):
8545     """Reads the export information from disk.
8546
8547     It will override the opcode source node and path with the actual
8548     information, if these two were not specified before.
8549
8550     @return: the export information
8551
8552     """
8553     assert self.op.mode == constants.INSTANCE_IMPORT
8554
8555     src_node = self.op.src_node
8556     src_path = self.op.src_path
8557
8558     if src_node is None:
8559       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8560       exp_list = self.rpc.call_export_list(locked_nodes)
8561       found = False
8562       for node in exp_list:
8563         if exp_list[node].fail_msg:
8564           continue
8565         if src_path in exp_list[node].payload:
8566           found = True
8567           self.op.src_node = src_node = node
8568           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8569                                                        src_path)
8570           break
8571       if not found:
8572         raise errors.OpPrereqError("No export found for relative path %s" %
8573                                     src_path, errors.ECODE_INVAL)
8574
8575     _CheckNodeOnline(self, src_node)
8576     result = self.rpc.call_export_info(src_node, src_path)
8577     result.Raise("No export or invalid export found in dir %s" % src_path)
8578
8579     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8580     if not export_info.has_section(constants.INISECT_EXP):
8581       raise errors.ProgrammerError("Corrupted export config",
8582                                    errors.ECODE_ENVIRON)
8583
8584     ei_version = export_info.get(constants.INISECT_EXP, "version")
8585     if (int(ei_version) != constants.EXPORT_VERSION):
8586       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8587                                  (ei_version, constants.EXPORT_VERSION),
8588                                  errors.ECODE_ENVIRON)
8589     return export_info
8590
8591   def _ReadExportParams(self, einfo):
8592     """Use export parameters as defaults.
8593
8594     In case the opcode doesn't specify (as in override) some instance
8595     parameters, then try to use them from the export information, if
8596     that declares them.
8597
8598     """
8599     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8600
8601     if self.op.disk_template is None:
8602       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8603         self.op.disk_template = einfo.get(constants.INISECT_INS,
8604                                           "disk_template")
8605         if self.op.disk_template not in constants.DISK_TEMPLATES:
8606           raise errors.OpPrereqError("Disk template specified in configuration"
8607                                      " file is not one of the allowed values:"
8608                                      " %s" % " ".join(constants.DISK_TEMPLATES))
8609       else:
8610         raise errors.OpPrereqError("No disk template specified and the export"
8611                                    " is missing the disk_template information",
8612                                    errors.ECODE_INVAL)
8613
8614     if not self.op.disks:
8615       disks = []
8616       # TODO: import the disk iv_name too
8617       for idx in range(constants.MAX_DISKS):
8618         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8619           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8620           disks.append({constants.IDISK_SIZE: disk_sz})
8621       self.op.disks = disks
8622       if not disks and self.op.disk_template != constants.DT_DISKLESS:
8623         raise errors.OpPrereqError("No disk info specified and the export"
8624                                    " is missing the disk information",
8625                                    errors.ECODE_INVAL)
8626
8627     if not self.op.nics:
8628       nics = []
8629       for idx in range(constants.MAX_NICS):
8630         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8631           ndict = {}
8632           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8633             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8634             ndict[name] = v
8635           nics.append(ndict)
8636         else:
8637           break
8638       self.op.nics = nics
8639
8640     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8641       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8642
8643     if (self.op.hypervisor is None and
8644         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8645       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8646
8647     if einfo.has_section(constants.INISECT_HYP):
8648       # use the export parameters but do not override the ones
8649       # specified by the user
8650       for name, value in einfo.items(constants.INISECT_HYP):
8651         if name not in self.op.hvparams:
8652           self.op.hvparams[name] = value
8653
8654     if einfo.has_section(constants.INISECT_BEP):
8655       # use the parameters, without overriding
8656       for name, value in einfo.items(constants.INISECT_BEP):
8657         if name not in self.op.beparams:
8658           self.op.beparams[name] = value
8659     else:
8660       # try to read the parameters old style, from the main section
8661       for name in constants.BES_PARAMETERS:
8662         if (name not in self.op.beparams and
8663             einfo.has_option(constants.INISECT_INS, name)):
8664           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8665
8666     if einfo.has_section(constants.INISECT_OSP):
8667       # use the parameters, without overriding
8668       for name, value in einfo.items(constants.INISECT_OSP):
8669         if name not in self.op.osparams:
8670           self.op.osparams[name] = value
8671
8672   def _RevertToDefaults(self, cluster):
8673     """Revert the instance parameters to the default values.
8674
8675     """
8676     # hvparams
8677     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8678     for name in self.op.hvparams.keys():
8679       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8680         del self.op.hvparams[name]
8681     # beparams
8682     be_defs = cluster.SimpleFillBE({})
8683     for name in self.op.beparams.keys():
8684       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8685         del self.op.beparams[name]
8686     # nic params
8687     nic_defs = cluster.SimpleFillNIC({})
8688     for nic in self.op.nics:
8689       for name in constants.NICS_PARAMETERS:
8690         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8691           del nic[name]
8692     # osparams
8693     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8694     for name in self.op.osparams.keys():
8695       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8696         del self.op.osparams[name]
8697
8698   def _CalculateFileStorageDir(self):
8699     """Calculate final instance file storage dir.
8700
8701     """
8702     # file storage dir calculation/check
8703     self.instance_file_storage_dir = None
8704     if self.op.disk_template in constants.DTS_FILEBASED:
8705       # build the full file storage dir path
8706       joinargs = []
8707
8708       if self.op.disk_template == constants.DT_SHARED_FILE:
8709         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8710       else:
8711         get_fsd_fn = self.cfg.GetFileStorageDir
8712
8713       cfg_storagedir = get_fsd_fn()
8714       if not cfg_storagedir:
8715         raise errors.OpPrereqError("Cluster file storage dir not defined")
8716       joinargs.append(cfg_storagedir)
8717
8718       if self.op.file_storage_dir is not None:
8719         joinargs.append(self.op.file_storage_dir)
8720
8721       joinargs.append(self.op.instance_name)
8722
8723       # pylint: disable=W0142
8724       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8725
8726   def CheckPrereq(self):
8727     """Check prerequisites.
8728
8729     """
8730     self._CalculateFileStorageDir()
8731
8732     if self.op.mode == constants.INSTANCE_IMPORT:
8733       export_info = self._ReadExportInfo()
8734       self._ReadExportParams(export_info)
8735
8736     if (not self.cfg.GetVGName() and
8737         self.op.disk_template not in constants.DTS_NOT_LVM):
8738       raise errors.OpPrereqError("Cluster does not support lvm-based"
8739                                  " instances", errors.ECODE_STATE)
8740
8741     if (self.op.hypervisor is None or
8742         self.op.hypervisor == constants.VALUE_AUTO):
8743       self.op.hypervisor = self.cfg.GetHypervisorType()
8744
8745     cluster = self.cfg.GetClusterInfo()
8746     enabled_hvs = cluster.enabled_hypervisors
8747     if self.op.hypervisor not in enabled_hvs:
8748       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8749                                  " cluster (%s)" % (self.op.hypervisor,
8750                                   ",".join(enabled_hvs)),
8751                                  errors.ECODE_STATE)
8752
8753     # Check tag validity
8754     for tag in self.op.tags:
8755       objects.TaggableObject.ValidateTag(tag)
8756
8757     # check hypervisor parameter syntax (locally)
8758     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8759     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8760                                       self.op.hvparams)
8761     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8762     hv_type.CheckParameterSyntax(filled_hvp)
8763     self.hv_full = filled_hvp
8764     # check that we don't specify global parameters on an instance
8765     _CheckGlobalHvParams(self.op.hvparams)
8766
8767     # fill and remember the beparams dict
8768     default_beparams = cluster.beparams[constants.PP_DEFAULT]
8769     for param, value in self.op.beparams.iteritems():
8770       if value == constants.VALUE_AUTO:
8771         self.op.beparams[param] = default_beparams[param]
8772     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8773     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8774
8775     # build os parameters
8776     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8777
8778     # now that hvp/bep are in final format, let's reset to defaults,
8779     # if told to do so
8780     if self.op.identify_defaults:
8781       self._RevertToDefaults(cluster)
8782
8783     # NIC buildup
8784     self.nics = []
8785     for idx, nic in enumerate(self.op.nics):
8786       nic_mode_req = nic.get(constants.INIC_MODE, None)
8787       nic_mode = nic_mode_req
8788       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8789         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8790
8791       # in routed mode, for the first nic, the default ip is 'auto'
8792       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8793         default_ip_mode = constants.VALUE_AUTO
8794       else:
8795         default_ip_mode = constants.VALUE_NONE
8796
8797       # ip validity checks
8798       ip = nic.get(constants.INIC_IP, default_ip_mode)
8799       if ip is None or ip.lower() == constants.VALUE_NONE:
8800         nic_ip = None
8801       elif ip.lower() == constants.VALUE_AUTO:
8802         if not self.op.name_check:
8803           raise errors.OpPrereqError("IP address set to auto but name checks"
8804                                      " have been skipped",
8805                                      errors.ECODE_INVAL)
8806         nic_ip = self.hostname1.ip
8807       else:
8808         if not netutils.IPAddress.IsValid(ip):
8809           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8810                                      errors.ECODE_INVAL)
8811         nic_ip = ip
8812
8813       # TODO: check the ip address for uniqueness
8814       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8815         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8816                                    errors.ECODE_INVAL)
8817
8818       # MAC address verification
8819       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8820       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8821         mac = utils.NormalizeAndValidateMac(mac)
8822
8823         try:
8824           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8825         except errors.ReservationError:
8826           raise errors.OpPrereqError("MAC address %s already in use"
8827                                      " in cluster" % mac,
8828                                      errors.ECODE_NOTUNIQUE)
8829
8830       #  Build nic parameters
8831       link = nic.get(constants.INIC_LINK, None)
8832       if link == constants.VALUE_AUTO:
8833         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8834       nicparams = {}
8835       if nic_mode_req:
8836         nicparams[constants.NIC_MODE] = nic_mode
8837       if link:
8838         nicparams[constants.NIC_LINK] = link
8839
8840       check_params = cluster.SimpleFillNIC(nicparams)
8841       objects.NIC.CheckParameterSyntax(check_params)
8842       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8843
8844     # disk checks/pre-build
8845     default_vg = self.cfg.GetVGName()
8846     self.disks = []
8847     for disk in self.op.disks:
8848       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8849       if mode not in constants.DISK_ACCESS_SET:
8850         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8851                                    mode, errors.ECODE_INVAL)
8852       size = disk.get(constants.IDISK_SIZE, None)
8853       if size is None:
8854         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8855       try:
8856         size = int(size)
8857       except (TypeError, ValueError):
8858         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8859                                    errors.ECODE_INVAL)
8860
8861       data_vg = disk.get(constants.IDISK_VG, default_vg)
8862       new_disk = {
8863         constants.IDISK_SIZE: size,
8864         constants.IDISK_MODE: mode,
8865         constants.IDISK_VG: data_vg,
8866         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8867         }
8868       if constants.IDISK_ADOPT in disk:
8869         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8870       self.disks.append(new_disk)
8871
8872     if self.op.mode == constants.INSTANCE_IMPORT:
8873       disk_images = []
8874       for idx in range(len(self.disks)):
8875         option = "disk%d_dump" % idx
8876         if export_info.has_option(constants.INISECT_INS, option):
8877           # FIXME: are the old os-es, disk sizes, etc. useful?
8878           export_name = export_info.get(constants.INISECT_INS, option)
8879           image = utils.PathJoin(self.op.src_path, export_name)
8880           disk_images.append(image)
8881         else:
8882           disk_images.append(False)
8883
8884       self.src_images = disk_images
8885
8886       old_name = export_info.get(constants.INISECT_INS, "name")
8887       if self.op.instance_name == old_name:
8888         for idx, nic in enumerate(self.nics):
8889           if nic.mac == constants.VALUE_AUTO:
8890             nic_mac_ini = "nic%d_mac" % idx
8891             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8892
8893     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8894
8895     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8896     if self.op.ip_check:
8897       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8898         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8899                                    (self.check_ip, self.op.instance_name),
8900                                    errors.ECODE_NOTUNIQUE)
8901
8902     #### mac address generation
8903     # By generating here the mac address both the allocator and the hooks get
8904     # the real final mac address rather than the 'auto' or 'generate' value.
8905     # There is a race condition between the generation and the instance object
8906     # creation, which means that we know the mac is valid now, but we're not
8907     # sure it will be when we actually add the instance. If things go bad
8908     # adding the instance will abort because of a duplicate mac, and the
8909     # creation job will fail.
8910     for nic in self.nics:
8911       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8912         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8913
8914     #### allocator run
8915
8916     if self.op.iallocator is not None:
8917       self._RunAllocator()
8918
8919     #### node related checks
8920
8921     # check primary node
8922     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8923     assert self.pnode is not None, \
8924       "Cannot retrieve locked node %s" % self.op.pnode
8925     if pnode.offline:
8926       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8927                                  pnode.name, errors.ECODE_STATE)
8928     if pnode.drained:
8929       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8930                                  pnode.name, errors.ECODE_STATE)
8931     if not pnode.vm_capable:
8932       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8933                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8934
8935     self.secondaries = []
8936
8937     # mirror node verification
8938     if self.op.disk_template in constants.DTS_INT_MIRROR:
8939       if self.op.snode == pnode.name:
8940         raise errors.OpPrereqError("The secondary node cannot be the"
8941                                    " primary node", errors.ECODE_INVAL)
8942       _CheckNodeOnline(self, self.op.snode)
8943       _CheckNodeNotDrained(self, self.op.snode)
8944       _CheckNodeVmCapable(self, self.op.snode)
8945       self.secondaries.append(self.op.snode)
8946
8947     nodenames = [pnode.name] + self.secondaries
8948
8949     if not self.adopt_disks:
8950       # Check lv size requirements, if not adopting
8951       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8952       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8953
8954     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8955       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8956                                 disk[constants.IDISK_ADOPT])
8957                      for disk in self.disks])
8958       if len(all_lvs) != len(self.disks):
8959         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8960                                    errors.ECODE_INVAL)
8961       for lv_name in all_lvs:
8962         try:
8963           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8964           # to ReserveLV uses the same syntax
8965           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8966         except errors.ReservationError:
8967           raise errors.OpPrereqError("LV named %s used by another instance" %
8968                                      lv_name, errors.ECODE_NOTUNIQUE)
8969
8970       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8971       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8972
8973       node_lvs = self.rpc.call_lv_list([pnode.name],
8974                                        vg_names.payload.keys())[pnode.name]
8975       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8976       node_lvs = node_lvs.payload
8977
8978       delta = all_lvs.difference(node_lvs.keys())
8979       if delta:
8980         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8981                                    utils.CommaJoin(delta),
8982                                    errors.ECODE_INVAL)
8983       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8984       if online_lvs:
8985         raise errors.OpPrereqError("Online logical volumes found, cannot"
8986                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8987                                    errors.ECODE_STATE)
8988       # update the size of disk based on what is found
8989       for dsk in self.disks:
8990         dsk[constants.IDISK_SIZE] = \
8991           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8992                                         dsk[constants.IDISK_ADOPT])][0]))
8993
8994     elif self.op.disk_template == constants.DT_BLOCK:
8995       # Normalize and de-duplicate device paths
8996       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8997                        for disk in self.disks])
8998       if len(all_disks) != len(self.disks):
8999         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9000                                    errors.ECODE_INVAL)
9001       baddisks = [d for d in all_disks
9002                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9003       if baddisks:
9004         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9005                                    " cannot be adopted" %
9006                                    (", ".join(baddisks),
9007                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9008                                    errors.ECODE_INVAL)
9009
9010       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9011                                             list(all_disks))[pnode.name]
9012       node_disks.Raise("Cannot get block device information from node %s" %
9013                        pnode.name)
9014       node_disks = node_disks.payload
9015       delta = all_disks.difference(node_disks.keys())
9016       if delta:
9017         raise errors.OpPrereqError("Missing block device(s): %s" %
9018                                    utils.CommaJoin(delta),
9019                                    errors.ECODE_INVAL)
9020       for dsk in self.disks:
9021         dsk[constants.IDISK_SIZE] = \
9022           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9023
9024     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9025
9026     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9027     # check OS parameters (remotely)
9028     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9029
9030     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9031
9032     # memory check on primary node
9033     if self.op.start:
9034       _CheckNodeFreeMemory(self, self.pnode.name,
9035                            "creating instance %s" % self.op.instance_name,
9036                            self.be_full[constants.BE_MEMORY],
9037                            self.op.hypervisor)
9038
9039     self.dry_run_result = list(nodenames)
9040
9041   def Exec(self, feedback_fn):
9042     """Create and add the instance to the cluster.
9043
9044     """
9045     instance = self.op.instance_name
9046     pnode_name = self.pnode.name
9047
9048     ht_kind = self.op.hypervisor
9049     if ht_kind in constants.HTS_REQ_PORT:
9050       network_port = self.cfg.AllocatePort()
9051     else:
9052       network_port = None
9053
9054     disks = _GenerateDiskTemplate(self,
9055                                   self.op.disk_template,
9056                                   instance, pnode_name,
9057                                   self.secondaries,
9058                                   self.disks,
9059                                   self.instance_file_storage_dir,
9060                                   self.op.file_driver,
9061                                   0,
9062                                   feedback_fn)
9063
9064     iobj = objects.Instance(name=instance, os=self.op.os_type,
9065                             primary_node=pnode_name,
9066                             nics=self.nics, disks=disks,
9067                             disk_template=self.op.disk_template,
9068                             admin_up=False,
9069                             network_port=network_port,
9070                             beparams=self.op.beparams,
9071                             hvparams=self.op.hvparams,
9072                             hypervisor=self.op.hypervisor,
9073                             osparams=self.op.osparams,
9074                             )
9075
9076     if self.op.tags:
9077       for tag in self.op.tags:
9078         iobj.AddTag(tag)
9079
9080     if self.adopt_disks:
9081       if self.op.disk_template == constants.DT_PLAIN:
9082         # rename LVs to the newly-generated names; we need to construct
9083         # 'fake' LV disks with the old data, plus the new unique_id
9084         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9085         rename_to = []
9086         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9087           rename_to.append(t_dsk.logical_id)
9088           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9089           self.cfg.SetDiskID(t_dsk, pnode_name)
9090         result = self.rpc.call_blockdev_rename(pnode_name,
9091                                                zip(tmp_disks, rename_to))
9092         result.Raise("Failed to rename adoped LVs")
9093     else:
9094       feedback_fn("* creating instance disks...")
9095       try:
9096         _CreateDisks(self, iobj)
9097       except errors.OpExecError:
9098         self.LogWarning("Device creation failed, reverting...")
9099         try:
9100           _RemoveDisks(self, iobj)
9101         finally:
9102           self.cfg.ReleaseDRBDMinors(instance)
9103           raise
9104
9105     feedback_fn("adding instance %s to cluster config" % instance)
9106
9107     self.cfg.AddInstance(iobj, self.proc.GetECId())
9108
9109     # Declare that we don't want to remove the instance lock anymore, as we've
9110     # added the instance to the config
9111     del self.remove_locks[locking.LEVEL_INSTANCE]
9112
9113     if self.op.mode == constants.INSTANCE_IMPORT:
9114       # Release unused nodes
9115       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9116     else:
9117       # Release all nodes
9118       _ReleaseLocks(self, locking.LEVEL_NODE)
9119
9120     disk_abort = False
9121     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9122       feedback_fn("* wiping instance disks...")
9123       try:
9124         _WipeDisks(self, iobj)
9125       except errors.OpExecError, err:
9126         logging.exception("Wiping disks failed")
9127         self.LogWarning("Wiping instance disks failed (%s)", err)
9128         disk_abort = True
9129
9130     if disk_abort:
9131       # Something is already wrong with the disks, don't do anything else
9132       pass
9133     elif self.op.wait_for_sync:
9134       disk_abort = not _WaitForSync(self, iobj)
9135     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9136       # make sure the disks are not degraded (still sync-ing is ok)
9137       feedback_fn("* checking mirrors status")
9138       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9139     else:
9140       disk_abort = False
9141
9142     if disk_abort:
9143       _RemoveDisks(self, iobj)
9144       self.cfg.RemoveInstance(iobj.name)
9145       # Make sure the instance lock gets removed
9146       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9147       raise errors.OpExecError("There are some degraded disks for"
9148                                " this instance")
9149
9150     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9151       if self.op.mode == constants.INSTANCE_CREATE:
9152         if not self.op.no_install:
9153           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9154                         not self.op.wait_for_sync)
9155           if pause_sync:
9156             feedback_fn("* pausing disk sync to install instance OS")
9157             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9158                                                               iobj.disks, True)
9159             for idx, success in enumerate(result.payload):
9160               if not success:
9161                 logging.warn("pause-sync of instance %s for disk %d failed",
9162                              instance, idx)
9163
9164           feedback_fn("* running the instance OS create scripts...")
9165           # FIXME: pass debug option from opcode to backend
9166           os_add_result = \
9167             self.rpc.call_instance_os_add(pnode_name, iobj, False,
9168                                           self.op.debug_level)
9169           if pause_sync:
9170             feedback_fn("* resuming disk sync")
9171             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9172                                                               iobj.disks, False)
9173             for idx, success in enumerate(result.payload):
9174               if not success:
9175                 logging.warn("resume-sync of instance %s for disk %d failed",
9176                              instance, idx)
9177
9178           os_add_result.Raise("Could not add os for instance %s"
9179                               " on node %s" % (instance, pnode_name))
9180
9181       elif self.op.mode == constants.INSTANCE_IMPORT:
9182         feedback_fn("* running the instance OS import scripts...")
9183
9184         transfers = []
9185
9186         for idx, image in enumerate(self.src_images):
9187           if not image:
9188             continue
9189
9190           # FIXME: pass debug option from opcode to backend
9191           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9192                                              constants.IEIO_FILE, (image, ),
9193                                              constants.IEIO_SCRIPT,
9194                                              (iobj.disks[idx], idx),
9195                                              None)
9196           transfers.append(dt)
9197
9198         import_result = \
9199           masterd.instance.TransferInstanceData(self, feedback_fn,
9200                                                 self.op.src_node, pnode_name,
9201                                                 self.pnode.secondary_ip,
9202                                                 iobj, transfers)
9203         if not compat.all(import_result):
9204           self.LogWarning("Some disks for instance %s on node %s were not"
9205                           " imported successfully" % (instance, pnode_name))
9206
9207       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9208         feedback_fn("* preparing remote import...")
9209         # The source cluster will stop the instance before attempting to make a
9210         # connection. In some cases stopping an instance can take a long time,
9211         # hence the shutdown timeout is added to the connection timeout.
9212         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9213                            self.op.source_shutdown_timeout)
9214         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9215
9216         assert iobj.primary_node == self.pnode.name
9217         disk_results = \
9218           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9219                                         self.source_x509_ca,
9220                                         self._cds, timeouts)
9221         if not compat.all(disk_results):
9222           # TODO: Should the instance still be started, even if some disks
9223           # failed to import (valid for local imports, too)?
9224           self.LogWarning("Some disks for instance %s on node %s were not"
9225                           " imported successfully" % (instance, pnode_name))
9226
9227         # Run rename script on newly imported instance
9228         assert iobj.name == instance
9229         feedback_fn("Running rename script for %s" % instance)
9230         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9231                                                    self.source_instance_name,
9232                                                    self.op.debug_level)
9233         if result.fail_msg:
9234           self.LogWarning("Failed to run rename script for %s on node"
9235                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9236
9237       else:
9238         # also checked in the prereq part
9239         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9240                                      % self.op.mode)
9241
9242     if self.op.start:
9243       iobj.admin_up = True
9244       self.cfg.Update(iobj, feedback_fn)
9245       logging.info("Starting instance %s on node %s", instance, pnode_name)
9246       feedback_fn("* starting instance...")
9247       result = self.rpc.call_instance_start(pnode_name, iobj,
9248                                             None, None, False)
9249       result.Raise("Could not start instance")
9250
9251     return list(iobj.all_nodes)
9252
9253
9254 class LUInstanceConsole(NoHooksLU):
9255   """Connect to an instance's console.
9256
9257   This is somewhat special in that it returns the command line that
9258   you need to run on the master node in order to connect to the
9259   console.
9260
9261   """
9262   REQ_BGL = False
9263
9264   def ExpandNames(self):
9265     self._ExpandAndLockInstance()
9266
9267   def CheckPrereq(self):
9268     """Check prerequisites.
9269
9270     This checks that the instance is in the cluster.
9271
9272     """
9273     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9274     assert self.instance is not None, \
9275       "Cannot retrieve locked instance %s" % self.op.instance_name
9276     _CheckNodeOnline(self, self.instance.primary_node)
9277
9278   def Exec(self, feedback_fn):
9279     """Connect to the console of an instance
9280
9281     """
9282     instance = self.instance
9283     node = instance.primary_node
9284
9285     node_insts = self.rpc.call_instance_list([node],
9286                                              [instance.hypervisor])[node]
9287     node_insts.Raise("Can't get node information from %s" % node)
9288
9289     if instance.name not in node_insts.payload:
9290       if instance.admin_up:
9291         state = constants.INSTST_ERRORDOWN
9292       else:
9293         state = constants.INSTST_ADMINDOWN
9294       raise errors.OpExecError("Instance %s is not running (state %s)" %
9295                                (instance.name, state))
9296
9297     logging.debug("Connecting to console of %s on %s", instance.name, node)
9298
9299     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9300
9301
9302 def _GetInstanceConsole(cluster, instance):
9303   """Returns console information for an instance.
9304
9305   @type cluster: L{objects.Cluster}
9306   @type instance: L{objects.Instance}
9307   @rtype: dict
9308
9309   """
9310   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9311   # beparams and hvparams are passed separately, to avoid editing the
9312   # instance and then saving the defaults in the instance itself.
9313   hvparams = cluster.FillHV(instance)
9314   beparams = cluster.FillBE(instance)
9315   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9316
9317   assert console.instance == instance.name
9318   assert console.Validate()
9319
9320   return console.ToDict()
9321
9322
9323 class LUInstanceReplaceDisks(LogicalUnit):
9324   """Replace the disks of an instance.
9325
9326   """
9327   HPATH = "mirrors-replace"
9328   HTYPE = constants.HTYPE_INSTANCE
9329   REQ_BGL = False
9330
9331   def CheckArguments(self):
9332     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9333                                   self.op.iallocator)
9334
9335   def ExpandNames(self):
9336     self._ExpandAndLockInstance()
9337
9338     assert locking.LEVEL_NODE not in self.needed_locks
9339     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9340
9341     assert self.op.iallocator is None or self.op.remote_node is None, \
9342       "Conflicting options"
9343
9344     if self.op.remote_node is not None:
9345       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9346
9347       # Warning: do not remove the locking of the new secondary here
9348       # unless DRBD8.AddChildren is changed to work in parallel;
9349       # currently it doesn't since parallel invocations of
9350       # FindUnusedMinor will conflict
9351       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9352       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9353     else:
9354       self.needed_locks[locking.LEVEL_NODE] = []
9355       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9356
9357       if self.op.iallocator is not None:
9358         # iallocator will select a new node in the same group
9359         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9360
9361     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9362                                    self.op.iallocator, self.op.remote_node,
9363                                    self.op.disks, False, self.op.early_release)
9364
9365     self.tasklets = [self.replacer]
9366
9367   def DeclareLocks(self, level):
9368     if level == locking.LEVEL_NODEGROUP:
9369       assert self.op.remote_node is None
9370       assert self.op.iallocator is not None
9371       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9372
9373       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9374       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9375         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9376
9377     elif level == locking.LEVEL_NODE:
9378       if self.op.iallocator is not None:
9379         assert self.op.remote_node is None
9380         assert not self.needed_locks[locking.LEVEL_NODE]
9381
9382         # Lock member nodes of all locked groups
9383         self.needed_locks[locking.LEVEL_NODE] = [node_name
9384           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9385           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9386       else:
9387         self._LockInstancesNodes()
9388
9389   def BuildHooksEnv(self):
9390     """Build hooks env.
9391
9392     This runs on the master, the primary and all the secondaries.
9393
9394     """
9395     instance = self.replacer.instance
9396     env = {
9397       "MODE": self.op.mode,
9398       "NEW_SECONDARY": self.op.remote_node,
9399       "OLD_SECONDARY": instance.secondary_nodes[0],
9400       }
9401     env.update(_BuildInstanceHookEnvByObject(self, instance))
9402     return env
9403
9404   def BuildHooksNodes(self):
9405     """Build hooks nodes.
9406
9407     """
9408     instance = self.replacer.instance
9409     nl = [
9410       self.cfg.GetMasterNode(),
9411       instance.primary_node,
9412       ]
9413     if self.op.remote_node is not None:
9414       nl.append(self.op.remote_node)
9415     return nl, nl
9416
9417   def CheckPrereq(self):
9418     """Check prerequisites.
9419
9420     """
9421     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9422             self.op.iallocator is None)
9423
9424     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9425     if owned_groups:
9426       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9427
9428     return LogicalUnit.CheckPrereq(self)
9429
9430
9431 class TLReplaceDisks(Tasklet):
9432   """Replaces disks for an instance.
9433
9434   Note: Locking is not within the scope of this class.
9435
9436   """
9437   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9438                disks, delay_iallocator, early_release):
9439     """Initializes this class.
9440
9441     """
9442     Tasklet.__init__(self, lu)
9443
9444     # Parameters
9445     self.instance_name = instance_name
9446     self.mode = mode
9447     self.iallocator_name = iallocator_name
9448     self.remote_node = remote_node
9449     self.disks = disks
9450     self.delay_iallocator = delay_iallocator
9451     self.early_release = early_release
9452
9453     # Runtime data
9454     self.instance = None
9455     self.new_node = None
9456     self.target_node = None
9457     self.other_node = None
9458     self.remote_node_info = None
9459     self.node_secondary_ip = None
9460
9461   @staticmethod
9462   def CheckArguments(mode, remote_node, iallocator):
9463     """Helper function for users of this class.
9464
9465     """
9466     # check for valid parameter combination
9467     if mode == constants.REPLACE_DISK_CHG:
9468       if remote_node is None and iallocator is None:
9469         raise errors.OpPrereqError("When changing the secondary either an"
9470                                    " iallocator script must be used or the"
9471                                    " new node given", errors.ECODE_INVAL)
9472
9473       if remote_node is not None and iallocator is not None:
9474         raise errors.OpPrereqError("Give either the iallocator or the new"
9475                                    " secondary, not both", errors.ECODE_INVAL)
9476
9477     elif remote_node is not None or iallocator is not None:
9478       # Not replacing the secondary
9479       raise errors.OpPrereqError("The iallocator and new node options can"
9480                                  " only be used when changing the"
9481                                  " secondary node", errors.ECODE_INVAL)
9482
9483   @staticmethod
9484   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9485     """Compute a new secondary node using an IAllocator.
9486
9487     """
9488     ial = IAllocator(lu.cfg, lu.rpc,
9489                      mode=constants.IALLOCATOR_MODE_RELOC,
9490                      name=instance_name,
9491                      relocate_from=list(relocate_from))
9492
9493     ial.Run(iallocator_name)
9494
9495     if not ial.success:
9496       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9497                                  " %s" % (iallocator_name, ial.info),
9498                                  errors.ECODE_NORES)
9499
9500     if len(ial.result) != ial.required_nodes:
9501       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9502                                  " of nodes (%s), required %s" %
9503                                  (iallocator_name,
9504                                   len(ial.result), ial.required_nodes),
9505                                  errors.ECODE_FAULT)
9506
9507     remote_node_name = ial.result[0]
9508
9509     lu.LogInfo("Selected new secondary for instance '%s': %s",
9510                instance_name, remote_node_name)
9511
9512     return remote_node_name
9513
9514   def _FindFaultyDisks(self, node_name):
9515     """Wrapper for L{_FindFaultyInstanceDisks}.
9516
9517     """
9518     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9519                                     node_name, True)
9520
9521   def _CheckDisksActivated(self, instance):
9522     """Checks if the instance disks are activated.
9523
9524     @param instance: The instance to check disks
9525     @return: True if they are activated, False otherwise
9526
9527     """
9528     nodes = instance.all_nodes
9529
9530     for idx, dev in enumerate(instance.disks):
9531       for node in nodes:
9532         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9533         self.cfg.SetDiskID(dev, node)
9534
9535         result = self.rpc.call_blockdev_find(node, dev)
9536
9537         if result.offline:
9538           continue
9539         elif result.fail_msg or not result.payload:
9540           return False
9541
9542     return True
9543
9544   def CheckPrereq(self):
9545     """Check prerequisites.
9546
9547     This checks that the instance is in the cluster.
9548
9549     """
9550     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9551     assert instance is not None, \
9552       "Cannot retrieve locked instance %s" % self.instance_name
9553
9554     if instance.disk_template != constants.DT_DRBD8:
9555       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9556                                  " instances", errors.ECODE_INVAL)
9557
9558     if len(instance.secondary_nodes) != 1:
9559       raise errors.OpPrereqError("The instance has a strange layout,"
9560                                  " expected one secondary but found %d" %
9561                                  len(instance.secondary_nodes),
9562                                  errors.ECODE_FAULT)
9563
9564     if not self.delay_iallocator:
9565       self._CheckPrereq2()
9566
9567   def _CheckPrereq2(self):
9568     """Check prerequisites, second part.
9569
9570     This function should always be part of CheckPrereq. It was separated and is
9571     now called from Exec because during node evacuation iallocator was only
9572     called with an unmodified cluster model, not taking planned changes into
9573     account.
9574
9575     """
9576     instance = self.instance
9577     secondary_node = instance.secondary_nodes[0]
9578
9579     if self.iallocator_name is None:
9580       remote_node = self.remote_node
9581     else:
9582       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9583                                        instance.name, instance.secondary_nodes)
9584
9585     if remote_node is None:
9586       self.remote_node_info = None
9587     else:
9588       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9589              "Remote node '%s' is not locked" % remote_node
9590
9591       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9592       assert self.remote_node_info is not None, \
9593         "Cannot retrieve locked node %s" % remote_node
9594
9595     if remote_node == self.instance.primary_node:
9596       raise errors.OpPrereqError("The specified node is the primary node of"
9597                                  " the instance", errors.ECODE_INVAL)
9598
9599     if remote_node == secondary_node:
9600       raise errors.OpPrereqError("The specified node is already the"
9601                                  " secondary node of the instance",
9602                                  errors.ECODE_INVAL)
9603
9604     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9605                                     constants.REPLACE_DISK_CHG):
9606       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9607                                  errors.ECODE_INVAL)
9608
9609     if self.mode == constants.REPLACE_DISK_AUTO:
9610       if not self._CheckDisksActivated(instance):
9611         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9612                                    " first" % self.instance_name,
9613                                    errors.ECODE_STATE)
9614       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9615       faulty_secondary = self._FindFaultyDisks(secondary_node)
9616
9617       if faulty_primary and faulty_secondary:
9618         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9619                                    " one node and can not be repaired"
9620                                    " automatically" % self.instance_name,
9621                                    errors.ECODE_STATE)
9622
9623       if faulty_primary:
9624         self.disks = faulty_primary
9625         self.target_node = instance.primary_node
9626         self.other_node = secondary_node
9627         check_nodes = [self.target_node, self.other_node]
9628       elif faulty_secondary:
9629         self.disks = faulty_secondary
9630         self.target_node = secondary_node
9631         self.other_node = instance.primary_node
9632         check_nodes = [self.target_node, self.other_node]
9633       else:
9634         self.disks = []
9635         check_nodes = []
9636
9637     else:
9638       # Non-automatic modes
9639       if self.mode == constants.REPLACE_DISK_PRI:
9640         self.target_node = instance.primary_node
9641         self.other_node = secondary_node
9642         check_nodes = [self.target_node, self.other_node]
9643
9644       elif self.mode == constants.REPLACE_DISK_SEC:
9645         self.target_node = secondary_node
9646         self.other_node = instance.primary_node
9647         check_nodes = [self.target_node, self.other_node]
9648
9649       elif self.mode == constants.REPLACE_DISK_CHG:
9650         self.new_node = remote_node
9651         self.other_node = instance.primary_node
9652         self.target_node = secondary_node
9653         check_nodes = [self.new_node, self.other_node]
9654
9655         _CheckNodeNotDrained(self.lu, remote_node)
9656         _CheckNodeVmCapable(self.lu, remote_node)
9657
9658         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9659         assert old_node_info is not None
9660         if old_node_info.offline and not self.early_release:
9661           # doesn't make sense to delay the release
9662           self.early_release = True
9663           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9664                           " early-release mode", secondary_node)
9665
9666       else:
9667         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9668                                      self.mode)
9669
9670       # If not specified all disks should be replaced
9671       if not self.disks:
9672         self.disks = range(len(self.instance.disks))
9673
9674     for node in check_nodes:
9675       _CheckNodeOnline(self.lu, node)
9676
9677     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9678                                                           self.other_node,
9679                                                           self.target_node]
9680                               if node_name is not None)
9681
9682     # Release unneeded node locks
9683     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9684
9685     # Release any owned node group
9686     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9687       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9688
9689     # Check whether disks are valid
9690     for disk_idx in self.disks:
9691       instance.FindDisk(disk_idx)
9692
9693     # Get secondary node IP addresses
9694     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9695                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9696
9697   def Exec(self, feedback_fn):
9698     """Execute disk replacement.
9699
9700     This dispatches the disk replacement to the appropriate handler.
9701
9702     """
9703     if self.delay_iallocator:
9704       self._CheckPrereq2()
9705
9706     if __debug__:
9707       # Verify owned locks before starting operation
9708       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9709       assert set(owned_nodes) == set(self.node_secondary_ip), \
9710           ("Incorrect node locks, owning %s, expected %s" %
9711            (owned_nodes, self.node_secondary_ip.keys()))
9712
9713       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9714       assert list(owned_instances) == [self.instance_name], \
9715           "Instance '%s' not locked" % self.instance_name
9716
9717       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9718           "Should not own any node group lock at this point"
9719
9720     if not self.disks:
9721       feedback_fn("No disks need replacement")
9722       return
9723
9724     feedback_fn("Replacing disk(s) %s for %s" %
9725                 (utils.CommaJoin(self.disks), self.instance.name))
9726
9727     activate_disks = (not self.instance.admin_up)
9728
9729     # Activate the instance disks if we're replacing them on a down instance
9730     if activate_disks:
9731       _StartInstanceDisks(self.lu, self.instance, True)
9732
9733     try:
9734       # Should we replace the secondary node?
9735       if self.new_node is not None:
9736         fn = self._ExecDrbd8Secondary
9737       else:
9738         fn = self._ExecDrbd8DiskOnly
9739
9740       result = fn(feedback_fn)
9741     finally:
9742       # Deactivate the instance disks if we're replacing them on a
9743       # down instance
9744       if activate_disks:
9745         _SafeShutdownInstanceDisks(self.lu, self.instance)
9746
9747     if __debug__:
9748       # Verify owned locks
9749       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9750       nodes = frozenset(self.node_secondary_ip)
9751       assert ((self.early_release and not owned_nodes) or
9752               (not self.early_release and not (set(owned_nodes) - nodes))), \
9753         ("Not owning the correct locks, early_release=%s, owned=%r,"
9754          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9755
9756     return result
9757
9758   def _CheckVolumeGroup(self, nodes):
9759     self.lu.LogInfo("Checking volume groups")
9760
9761     vgname = self.cfg.GetVGName()
9762
9763     # Make sure volume group exists on all involved nodes
9764     results = self.rpc.call_vg_list(nodes)
9765     if not results:
9766       raise errors.OpExecError("Can't list volume groups on the nodes")
9767
9768     for node in nodes:
9769       res = results[node]
9770       res.Raise("Error checking node %s" % node)
9771       if vgname not in res.payload:
9772         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9773                                  (vgname, node))
9774
9775   def _CheckDisksExistence(self, nodes):
9776     # Check disk existence
9777     for idx, dev in enumerate(self.instance.disks):
9778       if idx not in self.disks:
9779         continue
9780
9781       for node in nodes:
9782         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9783         self.cfg.SetDiskID(dev, node)
9784
9785         result = self.rpc.call_blockdev_find(node, dev)
9786
9787         msg = result.fail_msg
9788         if msg or not result.payload:
9789           if not msg:
9790             msg = "disk not found"
9791           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9792                                    (idx, node, msg))
9793
9794   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9795     for idx, dev in enumerate(self.instance.disks):
9796       if idx not in self.disks:
9797         continue
9798
9799       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9800                       (idx, node_name))
9801
9802       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9803                                    ldisk=ldisk):
9804         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9805                                  " replace disks for instance %s" %
9806                                  (node_name, self.instance.name))
9807
9808   def _CreateNewStorage(self, node_name):
9809     """Create new storage on the primary or secondary node.
9810
9811     This is only used for same-node replaces, not for changing the
9812     secondary node, hence we don't want to modify the existing disk.
9813
9814     """
9815     iv_names = {}
9816
9817     for idx, dev in enumerate(self.instance.disks):
9818       if idx not in self.disks:
9819         continue
9820
9821       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9822
9823       self.cfg.SetDiskID(dev, node_name)
9824
9825       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9826       names = _GenerateUniqueNames(self.lu, lv_names)
9827
9828       vg_data = dev.children[0].logical_id[0]
9829       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9830                              logical_id=(vg_data, names[0]))
9831       vg_meta = dev.children[1].logical_id[0]
9832       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9833                              logical_id=(vg_meta, names[1]))
9834
9835       new_lvs = [lv_data, lv_meta]
9836       old_lvs = [child.Copy() for child in dev.children]
9837       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9838
9839       # we pass force_create=True to force the LVM creation
9840       for new_lv in new_lvs:
9841         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9842                         _GetInstanceInfoText(self.instance), False)
9843
9844     return iv_names
9845
9846   def _CheckDevices(self, node_name, iv_names):
9847     for name, (dev, _, _) in iv_names.iteritems():
9848       self.cfg.SetDiskID(dev, node_name)
9849
9850       result = self.rpc.call_blockdev_find(node_name, dev)
9851
9852       msg = result.fail_msg
9853       if msg or not result.payload:
9854         if not msg:
9855           msg = "disk not found"
9856         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9857                                  (name, msg))
9858
9859       if result.payload.is_degraded:
9860         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9861
9862   def _RemoveOldStorage(self, node_name, iv_names):
9863     for name, (_, old_lvs, _) in iv_names.iteritems():
9864       self.lu.LogInfo("Remove logical volumes for %s" % name)
9865
9866       for lv in old_lvs:
9867         self.cfg.SetDiskID(lv, node_name)
9868
9869         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9870         if msg:
9871           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9872                              hint="remove unused LVs manually")
9873
9874   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9875     """Replace a disk on the primary or secondary for DRBD 8.
9876
9877     The algorithm for replace is quite complicated:
9878
9879       1. for each disk to be replaced:
9880
9881         1. create new LVs on the target node with unique names
9882         1. detach old LVs from the drbd device
9883         1. rename old LVs to name_replaced.<time_t>
9884         1. rename new LVs to old LVs
9885         1. attach the new LVs (with the old names now) to the drbd device
9886
9887       1. wait for sync across all devices
9888
9889       1. for each modified disk:
9890
9891         1. remove old LVs (which have the name name_replaces.<time_t>)
9892
9893     Failures are not very well handled.
9894
9895     """
9896     steps_total = 6
9897
9898     # Step: check device activation
9899     self.lu.LogStep(1, steps_total, "Check device existence")
9900     self._CheckDisksExistence([self.other_node, self.target_node])
9901     self._CheckVolumeGroup([self.target_node, self.other_node])
9902
9903     # Step: check other node consistency
9904     self.lu.LogStep(2, steps_total, "Check peer consistency")
9905     self._CheckDisksConsistency(self.other_node,
9906                                 self.other_node == self.instance.primary_node,
9907                                 False)
9908
9909     # Step: create new storage
9910     self.lu.LogStep(3, steps_total, "Allocate new storage")
9911     iv_names = self._CreateNewStorage(self.target_node)
9912
9913     # Step: for each lv, detach+rename*2+attach
9914     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9915     for dev, old_lvs, new_lvs in iv_names.itervalues():
9916       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9917
9918       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9919                                                      old_lvs)
9920       result.Raise("Can't detach drbd from local storage on node"
9921                    " %s for device %s" % (self.target_node, dev.iv_name))
9922       #dev.children = []
9923       #cfg.Update(instance)
9924
9925       # ok, we created the new LVs, so now we know we have the needed
9926       # storage; as such, we proceed on the target node to rename
9927       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9928       # using the assumption that logical_id == physical_id (which in
9929       # turn is the unique_id on that node)
9930
9931       # FIXME(iustin): use a better name for the replaced LVs
9932       temp_suffix = int(time.time())
9933       ren_fn = lambda d, suff: (d.physical_id[0],
9934                                 d.physical_id[1] + "_replaced-%s" % suff)
9935
9936       # Build the rename list based on what LVs exist on the node
9937       rename_old_to_new = []
9938       for to_ren in old_lvs:
9939         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9940         if not result.fail_msg and result.payload:
9941           # device exists
9942           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9943
9944       self.lu.LogInfo("Renaming the old LVs on the target node")
9945       result = self.rpc.call_blockdev_rename(self.target_node,
9946                                              rename_old_to_new)
9947       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9948
9949       # Now we rename the new LVs to the old LVs
9950       self.lu.LogInfo("Renaming the new LVs on the target node")
9951       rename_new_to_old = [(new, old.physical_id)
9952                            for old, new in zip(old_lvs, new_lvs)]
9953       result = self.rpc.call_blockdev_rename(self.target_node,
9954                                              rename_new_to_old)
9955       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9956
9957       # Intermediate steps of in memory modifications
9958       for old, new in zip(old_lvs, new_lvs):
9959         new.logical_id = old.logical_id
9960         self.cfg.SetDiskID(new, self.target_node)
9961
9962       # We need to modify old_lvs so that removal later removes the
9963       # right LVs, not the newly added ones; note that old_lvs is a
9964       # copy here
9965       for disk in old_lvs:
9966         disk.logical_id = ren_fn(disk, temp_suffix)
9967         self.cfg.SetDiskID(disk, self.target_node)
9968
9969       # Now that the new lvs have the old name, we can add them to the device
9970       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9971       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9972                                                   new_lvs)
9973       msg = result.fail_msg
9974       if msg:
9975         for new_lv in new_lvs:
9976           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9977                                                new_lv).fail_msg
9978           if msg2:
9979             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9980                                hint=("cleanup manually the unused logical"
9981                                      "volumes"))
9982         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9983
9984     cstep = 5
9985     if self.early_release:
9986       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9987       cstep += 1
9988       self._RemoveOldStorage(self.target_node, iv_names)
9989       # WARNING: we release both node locks here, do not do other RPCs
9990       # than WaitForSync to the primary node
9991       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9992                     names=[self.target_node, self.other_node])
9993
9994     # Wait for sync
9995     # This can fail as the old devices are degraded and _WaitForSync
9996     # does a combined result over all disks, so we don't check its return value
9997     self.lu.LogStep(cstep, steps_total, "Sync devices")
9998     cstep += 1
9999     _WaitForSync(self.lu, self.instance)
10000
10001     # Check all devices manually
10002     self._CheckDevices(self.instance.primary_node, iv_names)
10003
10004     # Step: remove old storage
10005     if not self.early_release:
10006       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10007       cstep += 1
10008       self._RemoveOldStorage(self.target_node, iv_names)
10009
10010   def _ExecDrbd8Secondary(self, feedback_fn):
10011     """Replace the secondary node for DRBD 8.
10012
10013     The algorithm for replace is quite complicated:
10014       - for all disks of the instance:
10015         - create new LVs on the new node with same names
10016         - shutdown the drbd device on the old secondary
10017         - disconnect the drbd network on the primary
10018         - create the drbd device on the new secondary
10019         - network attach the drbd on the primary, using an artifice:
10020           the drbd code for Attach() will connect to the network if it
10021           finds a device which is connected to the good local disks but
10022           not network enabled
10023       - wait for sync across all devices
10024       - remove all disks from the old secondary
10025
10026     Failures are not very well handled.
10027
10028     """
10029     steps_total = 6
10030
10031     pnode = self.instance.primary_node
10032
10033     # Step: check device activation
10034     self.lu.LogStep(1, steps_total, "Check device existence")
10035     self._CheckDisksExistence([self.instance.primary_node])
10036     self._CheckVolumeGroup([self.instance.primary_node])
10037
10038     # Step: check other node consistency
10039     self.lu.LogStep(2, steps_total, "Check peer consistency")
10040     self._CheckDisksConsistency(self.instance.primary_node, True, True)
10041
10042     # Step: create new storage
10043     self.lu.LogStep(3, steps_total, "Allocate new storage")
10044     for idx, dev in enumerate(self.instance.disks):
10045       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10046                       (self.new_node, idx))
10047       # we pass force_create=True to force LVM creation
10048       for new_lv in dev.children:
10049         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10050                         _GetInstanceInfoText(self.instance), False)
10051
10052     # Step 4: dbrd minors and drbd setups changes
10053     # after this, we must manually remove the drbd minors on both the
10054     # error and the success paths
10055     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10056     minors = self.cfg.AllocateDRBDMinor([self.new_node
10057                                          for dev in self.instance.disks],
10058                                         self.instance.name)
10059     logging.debug("Allocated minors %r", minors)
10060
10061     iv_names = {}
10062     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10063       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10064                       (self.new_node, idx))
10065       # create new devices on new_node; note that we create two IDs:
10066       # one without port, so the drbd will be activated without
10067       # networking information on the new node at this stage, and one
10068       # with network, for the latter activation in step 4
10069       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10070       if self.instance.primary_node == o_node1:
10071         p_minor = o_minor1
10072       else:
10073         assert self.instance.primary_node == o_node2, "Three-node instance?"
10074         p_minor = o_minor2
10075
10076       new_alone_id = (self.instance.primary_node, self.new_node, None,
10077                       p_minor, new_minor, o_secret)
10078       new_net_id = (self.instance.primary_node, self.new_node, o_port,
10079                     p_minor, new_minor, o_secret)
10080
10081       iv_names[idx] = (dev, dev.children, new_net_id)
10082       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10083                     new_net_id)
10084       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10085                               logical_id=new_alone_id,
10086                               children=dev.children,
10087                               size=dev.size)
10088       try:
10089         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10090                               _GetInstanceInfoText(self.instance), False)
10091       except errors.GenericError:
10092         self.cfg.ReleaseDRBDMinors(self.instance.name)
10093         raise
10094
10095     # We have new devices, shutdown the drbd on the old secondary
10096     for idx, dev in enumerate(self.instance.disks):
10097       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10098       self.cfg.SetDiskID(dev, self.target_node)
10099       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10100       if msg:
10101         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10102                            "node: %s" % (idx, msg),
10103                            hint=("Please cleanup this device manually as"
10104                                  " soon as possible"))
10105
10106     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10107     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10108                                                self.instance.disks)[pnode]
10109
10110     msg = result.fail_msg
10111     if msg:
10112       # detaches didn't succeed (unlikely)
10113       self.cfg.ReleaseDRBDMinors(self.instance.name)
10114       raise errors.OpExecError("Can't detach the disks from the network on"
10115                                " old node: %s" % (msg,))
10116
10117     # if we managed to detach at least one, we update all the disks of
10118     # the instance to point to the new secondary
10119     self.lu.LogInfo("Updating instance configuration")
10120     for dev, _, new_logical_id in iv_names.itervalues():
10121       dev.logical_id = new_logical_id
10122       self.cfg.SetDiskID(dev, self.instance.primary_node)
10123
10124     self.cfg.Update(self.instance, feedback_fn)
10125
10126     # and now perform the drbd attach
10127     self.lu.LogInfo("Attaching primary drbds to new secondary"
10128                     " (standalone => connected)")
10129     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10130                                             self.new_node],
10131                                            self.node_secondary_ip,
10132                                            self.instance.disks,
10133                                            self.instance.name,
10134                                            False)
10135     for to_node, to_result in result.items():
10136       msg = to_result.fail_msg
10137       if msg:
10138         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10139                            to_node, msg,
10140                            hint=("please do a gnt-instance info to see the"
10141                                  " status of disks"))
10142     cstep = 5
10143     if self.early_release:
10144       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10145       cstep += 1
10146       self._RemoveOldStorage(self.target_node, iv_names)
10147       # WARNING: we release all node locks here, do not do other RPCs
10148       # than WaitForSync to the primary node
10149       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10150                     names=[self.instance.primary_node,
10151                            self.target_node,
10152                            self.new_node])
10153
10154     # Wait for sync
10155     # This can fail as the old devices are degraded and _WaitForSync
10156     # does a combined result over all disks, so we don't check its return value
10157     self.lu.LogStep(cstep, steps_total, "Sync devices")
10158     cstep += 1
10159     _WaitForSync(self.lu, self.instance)
10160
10161     # Check all devices manually
10162     self._CheckDevices(self.instance.primary_node, iv_names)
10163
10164     # Step: remove old storage
10165     if not self.early_release:
10166       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10167       self._RemoveOldStorage(self.target_node, iv_names)
10168
10169
10170 class LURepairNodeStorage(NoHooksLU):
10171   """Repairs the volume group on a node.
10172
10173   """
10174   REQ_BGL = False
10175
10176   def CheckArguments(self):
10177     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10178
10179     storage_type = self.op.storage_type
10180
10181     if (constants.SO_FIX_CONSISTENCY not in
10182         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10183       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10184                                  " repaired" % storage_type,
10185                                  errors.ECODE_INVAL)
10186
10187   def ExpandNames(self):
10188     self.needed_locks = {
10189       locking.LEVEL_NODE: [self.op.node_name],
10190       }
10191
10192   def _CheckFaultyDisks(self, instance, node_name):
10193     """Ensure faulty disks abort the opcode or at least warn."""
10194     try:
10195       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10196                                   node_name, True):
10197         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10198                                    " node '%s'" % (instance.name, node_name),
10199                                    errors.ECODE_STATE)
10200     except errors.OpPrereqError, err:
10201       if self.op.ignore_consistency:
10202         self.proc.LogWarning(str(err.args[0]))
10203       else:
10204         raise
10205
10206   def CheckPrereq(self):
10207     """Check prerequisites.
10208
10209     """
10210     # Check whether any instance on this node has faulty disks
10211     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10212       if not inst.admin_up:
10213         continue
10214       check_nodes = set(inst.all_nodes)
10215       check_nodes.discard(self.op.node_name)
10216       for inst_node_name in check_nodes:
10217         self._CheckFaultyDisks(inst, inst_node_name)
10218
10219   def Exec(self, feedback_fn):
10220     feedback_fn("Repairing storage unit '%s' on %s ..." %
10221                 (self.op.name, self.op.node_name))
10222
10223     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10224     result = self.rpc.call_storage_execute(self.op.node_name,
10225                                            self.op.storage_type, st_args,
10226                                            self.op.name,
10227                                            constants.SO_FIX_CONSISTENCY)
10228     result.Raise("Failed to repair storage unit '%s' on %s" %
10229                  (self.op.name, self.op.node_name))
10230
10231
10232 class LUNodeEvacuate(NoHooksLU):
10233   """Evacuates instances off a list of nodes.
10234
10235   """
10236   REQ_BGL = False
10237
10238   def CheckArguments(self):
10239     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10240
10241   def ExpandNames(self):
10242     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10243
10244     if self.op.remote_node is not None:
10245       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10246       assert self.op.remote_node
10247
10248       if self.op.remote_node == self.op.node_name:
10249         raise errors.OpPrereqError("Can not use evacuated node as a new"
10250                                    " secondary node", errors.ECODE_INVAL)
10251
10252       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10253         raise errors.OpPrereqError("Without the use of an iallocator only"
10254                                    " secondary instances can be evacuated",
10255                                    errors.ECODE_INVAL)
10256
10257     # Declare locks
10258     self.share_locks = _ShareAll()
10259     self.needed_locks = {
10260       locking.LEVEL_INSTANCE: [],
10261       locking.LEVEL_NODEGROUP: [],
10262       locking.LEVEL_NODE: [],
10263       }
10264
10265     if self.op.remote_node is None:
10266       # Iallocator will choose any node(s) in the same group
10267       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10268     else:
10269       group_nodes = frozenset([self.op.remote_node])
10270
10271     # Determine nodes to be locked
10272     self.lock_nodes = set([self.op.node_name]) | group_nodes
10273
10274   def _DetermineInstances(self):
10275     """Builds list of instances to operate on.
10276
10277     """
10278     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10279
10280     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10281       # Primary instances only
10282       inst_fn = _GetNodePrimaryInstances
10283       assert self.op.remote_node is None, \
10284         "Evacuating primary instances requires iallocator"
10285     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10286       # Secondary instances only
10287       inst_fn = _GetNodeSecondaryInstances
10288     else:
10289       # All instances
10290       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10291       inst_fn = _GetNodeInstances
10292
10293     return inst_fn(self.cfg, self.op.node_name)
10294
10295   def DeclareLocks(self, level):
10296     if level == locking.LEVEL_INSTANCE:
10297       # Lock instances optimistically, needs verification once node and group
10298       # locks have been acquired
10299       self.needed_locks[locking.LEVEL_INSTANCE] = \
10300         set(i.name for i in self._DetermineInstances())
10301
10302     elif level == locking.LEVEL_NODEGROUP:
10303       # Lock node groups optimistically, needs verification once nodes have
10304       # been acquired
10305       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10306         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10307
10308     elif level == locking.LEVEL_NODE:
10309       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10310
10311   def CheckPrereq(self):
10312     # Verify locks
10313     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10314     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10315     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10316
10317     assert owned_nodes == self.lock_nodes
10318
10319     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10320     if owned_groups != wanted_groups:
10321       raise errors.OpExecError("Node groups changed since locks were acquired,"
10322                                " current groups are '%s', used to be '%s'" %
10323                                (utils.CommaJoin(wanted_groups),
10324                                 utils.CommaJoin(owned_groups)))
10325
10326     # Determine affected instances
10327     self.instances = self._DetermineInstances()
10328     self.instance_names = [i.name for i in self.instances]
10329
10330     if set(self.instance_names) != owned_instances:
10331       raise errors.OpExecError("Instances on node '%s' changed since locks"
10332                                " were acquired, current instances are '%s',"
10333                                " used to be '%s'" %
10334                                (self.op.node_name,
10335                                 utils.CommaJoin(self.instance_names),
10336                                 utils.CommaJoin(owned_instances)))
10337
10338     if self.instance_names:
10339       self.LogInfo("Evacuating instances from node '%s': %s",
10340                    self.op.node_name,
10341                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10342     else:
10343       self.LogInfo("No instances to evacuate from node '%s'",
10344                    self.op.node_name)
10345
10346     if self.op.remote_node is not None:
10347       for i in self.instances:
10348         if i.primary_node == self.op.remote_node:
10349           raise errors.OpPrereqError("Node %s is the primary node of"
10350                                      " instance %s, cannot use it as"
10351                                      " secondary" %
10352                                      (self.op.remote_node, i.name),
10353                                      errors.ECODE_INVAL)
10354
10355   def Exec(self, feedback_fn):
10356     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10357
10358     if not self.instance_names:
10359       # No instances to evacuate
10360       jobs = []
10361
10362     elif self.op.iallocator is not None:
10363       # TODO: Implement relocation to other group
10364       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10365                        evac_mode=self.op.mode,
10366                        instances=list(self.instance_names))
10367
10368       ial.Run(self.op.iallocator)
10369
10370       if not ial.success:
10371         raise errors.OpPrereqError("Can't compute node evacuation using"
10372                                    " iallocator '%s': %s" %
10373                                    (self.op.iallocator, ial.info),
10374                                    errors.ECODE_NORES)
10375
10376       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10377
10378     elif self.op.remote_node is not None:
10379       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10380       jobs = [
10381         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10382                                         remote_node=self.op.remote_node,
10383                                         disks=[],
10384                                         mode=constants.REPLACE_DISK_CHG,
10385                                         early_release=self.op.early_release)]
10386         for instance_name in self.instance_names
10387         ]
10388
10389     else:
10390       raise errors.ProgrammerError("No iallocator or remote node")
10391
10392     return ResultWithJobs(jobs)
10393
10394
10395 def _SetOpEarlyRelease(early_release, op):
10396   """Sets C{early_release} flag on opcodes if available.
10397
10398   """
10399   try:
10400     op.early_release = early_release
10401   except AttributeError:
10402     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10403
10404   return op
10405
10406
10407 def _NodeEvacDest(use_nodes, group, nodes):
10408   """Returns group or nodes depending on caller's choice.
10409
10410   """
10411   if use_nodes:
10412     return utils.CommaJoin(nodes)
10413   else:
10414     return group
10415
10416
10417 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10418   """Unpacks the result of change-group and node-evacuate iallocator requests.
10419
10420   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10421   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10422
10423   @type lu: L{LogicalUnit}
10424   @param lu: Logical unit instance
10425   @type alloc_result: tuple/list
10426   @param alloc_result: Result from iallocator
10427   @type early_release: bool
10428   @param early_release: Whether to release locks early if possible
10429   @type use_nodes: bool
10430   @param use_nodes: Whether to display node names instead of groups
10431
10432   """
10433   (moved, failed, jobs) = alloc_result
10434
10435   if failed:
10436     lu.LogWarning("Unable to evacuate instances %s",
10437                   utils.CommaJoin("%s (%s)" % (name, reason)
10438                                   for (name, reason) in failed))
10439
10440   if moved:
10441     lu.LogInfo("Instances to be moved: %s",
10442                utils.CommaJoin("%s (to %s)" %
10443                                (name, _NodeEvacDest(use_nodes, group, nodes))
10444                                for (name, group, nodes) in moved))
10445
10446   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10447               map(opcodes.OpCode.LoadOpCode, ops))
10448           for ops in jobs]
10449
10450
10451 class LUInstanceGrowDisk(LogicalUnit):
10452   """Grow a disk of an instance.
10453
10454   """
10455   HPATH = "disk-grow"
10456   HTYPE = constants.HTYPE_INSTANCE
10457   REQ_BGL = False
10458
10459   def ExpandNames(self):
10460     self._ExpandAndLockInstance()
10461     self.needed_locks[locking.LEVEL_NODE] = []
10462     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10463
10464   def DeclareLocks(self, level):
10465     if level == locking.LEVEL_NODE:
10466       self._LockInstancesNodes()
10467
10468   def BuildHooksEnv(self):
10469     """Build hooks env.
10470
10471     This runs on the master, the primary and all the secondaries.
10472
10473     """
10474     env = {
10475       "DISK": self.op.disk,
10476       "AMOUNT": self.op.amount,
10477       }
10478     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10479     return env
10480
10481   def BuildHooksNodes(self):
10482     """Build hooks nodes.
10483
10484     """
10485     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10486     return (nl, nl)
10487
10488   def CheckPrereq(self):
10489     """Check prerequisites.
10490
10491     This checks that the instance is in the cluster.
10492
10493     """
10494     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10495     assert instance is not None, \
10496       "Cannot retrieve locked instance %s" % self.op.instance_name
10497     nodenames = list(instance.all_nodes)
10498     for node in nodenames:
10499       _CheckNodeOnline(self, node)
10500
10501     self.instance = instance
10502
10503     if instance.disk_template not in constants.DTS_GROWABLE:
10504       raise errors.OpPrereqError("Instance's disk layout does not support"
10505                                  " growing", errors.ECODE_INVAL)
10506
10507     self.disk = instance.FindDisk(self.op.disk)
10508
10509     if instance.disk_template not in (constants.DT_FILE,
10510                                       constants.DT_SHARED_FILE):
10511       # TODO: check the free disk space for file, when that feature will be
10512       # supported
10513       _CheckNodesFreeDiskPerVG(self, nodenames,
10514                                self.disk.ComputeGrowth(self.op.amount))
10515
10516   def Exec(self, feedback_fn):
10517     """Execute disk grow.
10518
10519     """
10520     instance = self.instance
10521     disk = self.disk
10522
10523     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10524     if not disks_ok:
10525       raise errors.OpExecError("Cannot activate block device to grow")
10526
10527     # First run all grow ops in dry-run mode
10528     for node in instance.all_nodes:
10529       self.cfg.SetDiskID(disk, node)
10530       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10531       result.Raise("Grow request failed to node %s" % node)
10532
10533     # We know that (as far as we can test) operations across different
10534     # nodes will succeed, time to run it for real
10535     for node in instance.all_nodes:
10536       self.cfg.SetDiskID(disk, node)
10537       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10538       result.Raise("Grow request failed to node %s" % node)
10539
10540       # TODO: Rewrite code to work properly
10541       # DRBD goes into sync mode for a short amount of time after executing the
10542       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10543       # calling "resize" in sync mode fails. Sleeping for a short amount of
10544       # time is a work-around.
10545       time.sleep(5)
10546
10547     disk.RecordGrow(self.op.amount)
10548     self.cfg.Update(instance, feedback_fn)
10549     if self.op.wait_for_sync:
10550       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10551       if disk_abort:
10552         self.proc.LogWarning("Disk sync-ing has not returned a good"
10553                              " status; please check the instance")
10554       if not instance.admin_up:
10555         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10556     elif not instance.admin_up:
10557       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10558                            " not supposed to be running because no wait for"
10559                            " sync mode was requested")
10560
10561
10562 class LUInstanceQueryData(NoHooksLU):
10563   """Query runtime instance data.
10564
10565   """
10566   REQ_BGL = False
10567
10568   def ExpandNames(self):
10569     self.needed_locks = {}
10570
10571     # Use locking if requested or when non-static information is wanted
10572     if not (self.op.static or self.op.use_locking):
10573       self.LogWarning("Non-static data requested, locks need to be acquired")
10574       self.op.use_locking = True
10575
10576     if self.op.instances or not self.op.use_locking:
10577       # Expand instance names right here
10578       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10579     else:
10580       # Will use acquired locks
10581       self.wanted_names = None
10582
10583     if self.op.use_locking:
10584       self.share_locks = _ShareAll()
10585
10586       if self.wanted_names is None:
10587         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10588       else:
10589         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10590
10591       self.needed_locks[locking.LEVEL_NODE] = []
10592       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10593
10594   def DeclareLocks(self, level):
10595     if self.op.use_locking and level == locking.LEVEL_NODE:
10596       self._LockInstancesNodes()
10597
10598   def CheckPrereq(self):
10599     """Check prerequisites.
10600
10601     This only checks the optional instance list against the existing names.
10602
10603     """
10604     if self.wanted_names is None:
10605       assert self.op.use_locking, "Locking was not used"
10606       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10607
10608     self.wanted_instances = \
10609         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10610
10611   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10612     """Returns the status of a block device
10613
10614     """
10615     if self.op.static or not node:
10616       return None
10617
10618     self.cfg.SetDiskID(dev, node)
10619
10620     result = self.rpc.call_blockdev_find(node, dev)
10621     if result.offline:
10622       return None
10623
10624     result.Raise("Can't compute disk status for %s" % instance_name)
10625
10626     status = result.payload
10627     if status is None:
10628       return None
10629
10630     return (status.dev_path, status.major, status.minor,
10631             status.sync_percent, status.estimated_time,
10632             status.is_degraded, status.ldisk_status)
10633
10634   def _ComputeDiskStatus(self, instance, snode, dev):
10635     """Compute block device status.
10636
10637     """
10638     if dev.dev_type in constants.LDS_DRBD:
10639       # we change the snode then (otherwise we use the one passed in)
10640       if dev.logical_id[0] == instance.primary_node:
10641         snode = dev.logical_id[1]
10642       else:
10643         snode = dev.logical_id[0]
10644
10645     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10646                                               instance.name, dev)
10647     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10648
10649     if dev.children:
10650       dev_children = map(compat.partial(self._ComputeDiskStatus,
10651                                         instance, snode),
10652                          dev.children)
10653     else:
10654       dev_children = []
10655
10656     return {
10657       "iv_name": dev.iv_name,
10658       "dev_type": dev.dev_type,
10659       "logical_id": dev.logical_id,
10660       "physical_id": dev.physical_id,
10661       "pstatus": dev_pstatus,
10662       "sstatus": dev_sstatus,
10663       "children": dev_children,
10664       "mode": dev.mode,
10665       "size": dev.size,
10666       }
10667
10668   def Exec(self, feedback_fn):
10669     """Gather and return data"""
10670     result = {}
10671
10672     cluster = self.cfg.GetClusterInfo()
10673
10674     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10675                                           for i in self.wanted_instances)
10676     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10677       if self.op.static or pnode.offline:
10678         remote_state = None
10679         if pnode.offline:
10680           self.LogWarning("Primary node %s is marked offline, returning static"
10681                           " information only for instance %s" %
10682                           (pnode.name, instance.name))
10683       else:
10684         remote_info = self.rpc.call_instance_info(instance.primary_node,
10685                                                   instance.name,
10686                                                   instance.hypervisor)
10687         remote_info.Raise("Error checking node %s" % instance.primary_node)
10688         remote_info = remote_info.payload
10689         if remote_info and "state" in remote_info:
10690           remote_state = "up"
10691         else:
10692           remote_state = "down"
10693
10694       if instance.admin_up:
10695         config_state = "up"
10696       else:
10697         config_state = "down"
10698
10699       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10700                   instance.disks)
10701
10702       result[instance.name] = {
10703         "name": instance.name,
10704         "config_state": config_state,
10705         "run_state": remote_state,
10706         "pnode": instance.primary_node,
10707         "snodes": instance.secondary_nodes,
10708         "os": instance.os,
10709         # this happens to be the same format used for hooks
10710         "nics": _NICListToTuple(self, instance.nics),
10711         "disk_template": instance.disk_template,
10712         "disks": disks,
10713         "hypervisor": instance.hypervisor,
10714         "network_port": instance.network_port,
10715         "hv_instance": instance.hvparams,
10716         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10717         "be_instance": instance.beparams,
10718         "be_actual": cluster.FillBE(instance),
10719         "os_instance": instance.osparams,
10720         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10721         "serial_no": instance.serial_no,
10722         "mtime": instance.mtime,
10723         "ctime": instance.ctime,
10724         "uuid": instance.uuid,
10725         }
10726
10727     return result
10728
10729
10730 class LUInstanceSetParams(LogicalUnit):
10731   """Modifies an instances's parameters.
10732
10733   """
10734   HPATH = "instance-modify"
10735   HTYPE = constants.HTYPE_INSTANCE
10736   REQ_BGL = False
10737
10738   def CheckArguments(self):
10739     if not (self.op.nics or self.op.disks or self.op.disk_template or
10740             self.op.hvparams or self.op.beparams or self.op.os_name):
10741       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10742
10743     if self.op.hvparams:
10744       _CheckGlobalHvParams(self.op.hvparams)
10745
10746     # Disk validation
10747     disk_addremove = 0
10748     for disk_op, disk_dict in self.op.disks:
10749       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10750       if disk_op == constants.DDM_REMOVE:
10751         disk_addremove += 1
10752         continue
10753       elif disk_op == constants.DDM_ADD:
10754         disk_addremove += 1
10755       else:
10756         if not isinstance(disk_op, int):
10757           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10758         if not isinstance(disk_dict, dict):
10759           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10760           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10761
10762       if disk_op == constants.DDM_ADD:
10763         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10764         if mode not in constants.DISK_ACCESS_SET:
10765           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10766                                      errors.ECODE_INVAL)
10767         size = disk_dict.get(constants.IDISK_SIZE, None)
10768         if size is None:
10769           raise errors.OpPrereqError("Required disk parameter size missing",
10770                                      errors.ECODE_INVAL)
10771         try:
10772           size = int(size)
10773         except (TypeError, ValueError), err:
10774           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10775                                      str(err), errors.ECODE_INVAL)
10776         disk_dict[constants.IDISK_SIZE] = size
10777       else:
10778         # modification of disk
10779         if constants.IDISK_SIZE in disk_dict:
10780           raise errors.OpPrereqError("Disk size change not possible, use"
10781                                      " grow-disk", errors.ECODE_INVAL)
10782
10783     if disk_addremove > 1:
10784       raise errors.OpPrereqError("Only one disk add or remove operation"
10785                                  " supported at a time", errors.ECODE_INVAL)
10786
10787     if self.op.disks and self.op.disk_template is not None:
10788       raise errors.OpPrereqError("Disk template conversion and other disk"
10789                                  " changes not supported at the same time",
10790                                  errors.ECODE_INVAL)
10791
10792     if (self.op.disk_template and
10793         self.op.disk_template in constants.DTS_INT_MIRROR and
10794         self.op.remote_node is None):
10795       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10796                                  " one requires specifying a secondary node",
10797                                  errors.ECODE_INVAL)
10798
10799     # NIC validation
10800     nic_addremove = 0
10801     for nic_op, nic_dict in self.op.nics:
10802       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10803       if nic_op == constants.DDM_REMOVE:
10804         nic_addremove += 1
10805         continue
10806       elif nic_op == constants.DDM_ADD:
10807         nic_addremove += 1
10808       else:
10809         if not isinstance(nic_op, int):
10810           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10811         if not isinstance(nic_dict, dict):
10812           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10813           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10814
10815       # nic_dict should be a dict
10816       nic_ip = nic_dict.get(constants.INIC_IP, None)
10817       if nic_ip is not None:
10818         if nic_ip.lower() == constants.VALUE_NONE:
10819           nic_dict[constants.INIC_IP] = None
10820         else:
10821           if not netutils.IPAddress.IsValid(nic_ip):
10822             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10823                                        errors.ECODE_INVAL)
10824
10825       nic_bridge = nic_dict.get("bridge", None)
10826       nic_link = nic_dict.get(constants.INIC_LINK, None)
10827       if nic_bridge and nic_link:
10828         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10829                                    " at the same time", errors.ECODE_INVAL)
10830       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10831         nic_dict["bridge"] = None
10832       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10833         nic_dict[constants.INIC_LINK] = None
10834
10835       if nic_op == constants.DDM_ADD:
10836         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10837         if nic_mac is None:
10838           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10839
10840       if constants.INIC_MAC in nic_dict:
10841         nic_mac = nic_dict[constants.INIC_MAC]
10842         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10843           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10844
10845         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10846           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10847                                      " modifying an existing nic",
10848                                      errors.ECODE_INVAL)
10849
10850     if nic_addremove > 1:
10851       raise errors.OpPrereqError("Only one NIC add or remove operation"
10852                                  " supported at a time", errors.ECODE_INVAL)
10853
10854   def ExpandNames(self):
10855     self._ExpandAndLockInstance()
10856     self.needed_locks[locking.LEVEL_NODE] = []
10857     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10858
10859   def DeclareLocks(self, level):
10860     if level == locking.LEVEL_NODE:
10861       self._LockInstancesNodes()
10862       if self.op.disk_template and self.op.remote_node:
10863         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10864         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10865
10866   def BuildHooksEnv(self):
10867     """Build hooks env.
10868
10869     This runs on the master, primary and secondaries.
10870
10871     """
10872     args = dict()
10873     if constants.BE_MEMORY in self.be_new:
10874       args["memory"] = self.be_new[constants.BE_MEMORY]
10875     if constants.BE_VCPUS in self.be_new:
10876       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10877     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10878     # information at all.
10879     if self.op.nics:
10880       args["nics"] = []
10881       nic_override = dict(self.op.nics)
10882       for idx, nic in enumerate(self.instance.nics):
10883         if idx in nic_override:
10884           this_nic_override = nic_override[idx]
10885         else:
10886           this_nic_override = {}
10887         if constants.INIC_IP in this_nic_override:
10888           ip = this_nic_override[constants.INIC_IP]
10889         else:
10890           ip = nic.ip
10891         if constants.INIC_MAC in this_nic_override:
10892           mac = this_nic_override[constants.INIC_MAC]
10893         else:
10894           mac = nic.mac
10895         if idx in self.nic_pnew:
10896           nicparams = self.nic_pnew[idx]
10897         else:
10898           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10899         mode = nicparams[constants.NIC_MODE]
10900         link = nicparams[constants.NIC_LINK]
10901         args["nics"].append((ip, mac, mode, link))
10902       if constants.DDM_ADD in nic_override:
10903         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10904         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10905         nicparams = self.nic_pnew[constants.DDM_ADD]
10906         mode = nicparams[constants.NIC_MODE]
10907         link = nicparams[constants.NIC_LINK]
10908         args["nics"].append((ip, mac, mode, link))
10909       elif constants.DDM_REMOVE in nic_override:
10910         del args["nics"][-1]
10911
10912     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10913     if self.op.disk_template:
10914       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10915
10916     return env
10917
10918   def BuildHooksNodes(self):
10919     """Build hooks nodes.
10920
10921     """
10922     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10923     return (nl, nl)
10924
10925   def CheckPrereq(self):
10926     """Check prerequisites.
10927
10928     This only checks the instance list against the existing names.
10929
10930     """
10931     # checking the new params on the primary/secondary nodes
10932
10933     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10934     cluster = self.cluster = self.cfg.GetClusterInfo()
10935     assert self.instance is not None, \
10936       "Cannot retrieve locked instance %s" % self.op.instance_name
10937     pnode = instance.primary_node
10938     nodelist = list(instance.all_nodes)
10939
10940     # OS change
10941     if self.op.os_name and not self.op.force:
10942       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10943                       self.op.force_variant)
10944       instance_os = self.op.os_name
10945     else:
10946       instance_os = instance.os
10947
10948     if self.op.disk_template:
10949       if instance.disk_template == self.op.disk_template:
10950         raise errors.OpPrereqError("Instance already has disk template %s" %
10951                                    instance.disk_template, errors.ECODE_INVAL)
10952
10953       if (instance.disk_template,
10954           self.op.disk_template) not in self._DISK_CONVERSIONS:
10955         raise errors.OpPrereqError("Unsupported disk template conversion from"
10956                                    " %s to %s" % (instance.disk_template,
10957                                                   self.op.disk_template),
10958                                    errors.ECODE_INVAL)
10959       _CheckInstanceDown(self, instance, "cannot change disk template")
10960       if self.op.disk_template in constants.DTS_INT_MIRROR:
10961         if self.op.remote_node == pnode:
10962           raise errors.OpPrereqError("Given new secondary node %s is the same"
10963                                      " as the primary node of the instance" %
10964                                      self.op.remote_node, errors.ECODE_STATE)
10965         _CheckNodeOnline(self, self.op.remote_node)
10966         _CheckNodeNotDrained(self, self.op.remote_node)
10967         # FIXME: here we assume that the old instance type is DT_PLAIN
10968         assert instance.disk_template == constants.DT_PLAIN
10969         disks = [{constants.IDISK_SIZE: d.size,
10970                   constants.IDISK_VG: d.logical_id[0]}
10971                  for d in instance.disks]
10972         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10973         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10974
10975     # hvparams processing
10976     if self.op.hvparams:
10977       hv_type = instance.hypervisor
10978       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10979       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10980       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10981
10982       # local check
10983       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10984       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10985       self.hv_proposed = self.hv_new = hv_new # the new actual values
10986       self.hv_inst = i_hvdict # the new dict (without defaults)
10987     else:
10988       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
10989                                               instance.hvparams)
10990       self.hv_new = self.hv_inst = {}
10991
10992     # beparams processing
10993     if self.op.beparams:
10994       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10995                                    use_none=True)
10996       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10997       be_new = cluster.SimpleFillBE(i_bedict)
10998       self.be_proposed = self.be_new = be_new # the new actual values
10999       self.be_inst = i_bedict # the new dict (without defaults)
11000     else:
11001       self.be_new = self.be_inst = {}
11002       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11003     be_old = cluster.FillBE(instance)
11004
11005     # CPU param validation -- checking every time a paramtere is
11006     # changed to cover all cases where either CPU mask or vcpus have
11007     # changed
11008     if (constants.BE_VCPUS in self.be_proposed and
11009         constants.HV_CPU_MASK in self.hv_proposed):
11010       cpu_list = \
11011         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11012       # Verify mask is consistent with number of vCPUs. Can skip this
11013       # test if only 1 entry in the CPU mask, which means same mask
11014       # is applied to all vCPUs.
11015       if (len(cpu_list) > 1 and
11016           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11017         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11018                                    " CPU mask [%s]" %
11019                                    (self.be_proposed[constants.BE_VCPUS],
11020                                     self.hv_proposed[constants.HV_CPU_MASK]),
11021                                    errors.ECODE_INVAL)
11022
11023       # Only perform this test if a new CPU mask is given
11024       if constants.HV_CPU_MASK in self.hv_new:
11025         # Calculate the largest CPU number requested
11026         max_requested_cpu = max(map(max, cpu_list))
11027         # Check that all of the instance's nodes have enough physical CPUs to
11028         # satisfy the requested CPU mask
11029         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11030                                 max_requested_cpu + 1, instance.hypervisor)
11031
11032     # osparams processing
11033     if self.op.osparams:
11034       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11035       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11036       self.os_inst = i_osdict # the new dict (without defaults)
11037     else:
11038       self.os_inst = {}
11039
11040     self.warn = []
11041
11042     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11043         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11044       mem_check_list = [pnode]
11045       if be_new[constants.BE_AUTO_BALANCE]:
11046         # either we changed auto_balance to yes or it was from before
11047         mem_check_list.extend(instance.secondary_nodes)
11048       instance_info = self.rpc.call_instance_info(pnode, instance.name,
11049                                                   instance.hypervisor)
11050       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11051                                          instance.hypervisor)
11052       pninfo = nodeinfo[pnode]
11053       msg = pninfo.fail_msg
11054       if msg:
11055         # Assume the primary node is unreachable and go ahead
11056         self.warn.append("Can't get info from primary node %s: %s" %
11057                          (pnode, msg))
11058       elif not isinstance(pninfo.payload.get("memory_free", None), int):
11059         self.warn.append("Node data from primary node %s doesn't contain"
11060                          " free memory information" % pnode)
11061       elif instance_info.fail_msg:
11062         self.warn.append("Can't get instance runtime information: %s" %
11063                         instance_info.fail_msg)
11064       else:
11065         if instance_info.payload:
11066           current_mem = int(instance_info.payload["memory"])
11067         else:
11068           # Assume instance not running
11069           # (there is a slight race condition here, but it's not very probable,
11070           # and we have no other way to check)
11071           current_mem = 0
11072         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11073                     pninfo.payload["memory_free"])
11074         if miss_mem > 0:
11075           raise errors.OpPrereqError("This change will prevent the instance"
11076                                      " from starting, due to %d MB of memory"
11077                                      " missing on its primary node" % miss_mem,
11078                                      errors.ECODE_NORES)
11079
11080       if be_new[constants.BE_AUTO_BALANCE]:
11081         for node, nres in nodeinfo.items():
11082           if node not in instance.secondary_nodes:
11083             continue
11084           nres.Raise("Can't get info from secondary node %s" % node,
11085                      prereq=True, ecode=errors.ECODE_STATE)
11086           if not isinstance(nres.payload.get("memory_free", None), int):
11087             raise errors.OpPrereqError("Secondary node %s didn't return free"
11088                                        " memory information" % node,
11089                                        errors.ECODE_STATE)
11090           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11091             raise errors.OpPrereqError("This change will prevent the instance"
11092                                        " from failover to its secondary node"
11093                                        " %s, due to not enough memory" % node,
11094                                        errors.ECODE_STATE)
11095
11096     # NIC processing
11097     self.nic_pnew = {}
11098     self.nic_pinst = {}
11099     for nic_op, nic_dict in self.op.nics:
11100       if nic_op == constants.DDM_REMOVE:
11101         if not instance.nics:
11102           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11103                                      errors.ECODE_INVAL)
11104         continue
11105       if nic_op != constants.DDM_ADD:
11106         # an existing nic
11107         if not instance.nics:
11108           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11109                                      " no NICs" % nic_op,
11110                                      errors.ECODE_INVAL)
11111         if nic_op < 0 or nic_op >= len(instance.nics):
11112           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11113                                      " are 0 to %d" %
11114                                      (nic_op, len(instance.nics) - 1),
11115                                      errors.ECODE_INVAL)
11116         old_nic_params = instance.nics[nic_op].nicparams
11117         old_nic_ip = instance.nics[nic_op].ip
11118       else:
11119         old_nic_params = {}
11120         old_nic_ip = None
11121
11122       update_params_dict = dict([(key, nic_dict[key])
11123                                  for key in constants.NICS_PARAMETERS
11124                                  if key in nic_dict])
11125
11126       if "bridge" in nic_dict:
11127         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11128
11129       new_nic_params = _GetUpdatedParams(old_nic_params,
11130                                          update_params_dict)
11131       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11132       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11133       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11134       self.nic_pinst[nic_op] = new_nic_params
11135       self.nic_pnew[nic_op] = new_filled_nic_params
11136       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11137
11138       if new_nic_mode == constants.NIC_MODE_BRIDGED:
11139         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11140         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11141         if msg:
11142           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11143           if self.op.force:
11144             self.warn.append(msg)
11145           else:
11146             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11147       if new_nic_mode == constants.NIC_MODE_ROUTED:
11148         if constants.INIC_IP in nic_dict:
11149           nic_ip = nic_dict[constants.INIC_IP]
11150         else:
11151           nic_ip = old_nic_ip
11152         if nic_ip is None:
11153           raise errors.OpPrereqError("Cannot set the nic ip to None"
11154                                      " on a routed nic", errors.ECODE_INVAL)
11155       if constants.INIC_MAC in nic_dict:
11156         nic_mac = nic_dict[constants.INIC_MAC]
11157         if nic_mac is None:
11158           raise errors.OpPrereqError("Cannot set the nic mac to None",
11159                                      errors.ECODE_INVAL)
11160         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11161           # otherwise generate the mac
11162           nic_dict[constants.INIC_MAC] = \
11163             self.cfg.GenerateMAC(self.proc.GetECId())
11164         else:
11165           # or validate/reserve the current one
11166           try:
11167             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11168           except errors.ReservationError:
11169             raise errors.OpPrereqError("MAC address %s already in use"
11170                                        " in cluster" % nic_mac,
11171                                        errors.ECODE_NOTUNIQUE)
11172
11173     # DISK processing
11174     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11175       raise errors.OpPrereqError("Disk operations not supported for"
11176                                  " diskless instances",
11177                                  errors.ECODE_INVAL)
11178     for disk_op, _ in self.op.disks:
11179       if disk_op == constants.DDM_REMOVE:
11180         if len(instance.disks) == 1:
11181           raise errors.OpPrereqError("Cannot remove the last disk of"
11182                                      " an instance", errors.ECODE_INVAL)
11183         _CheckInstanceDown(self, instance, "cannot remove disks")
11184
11185       if (disk_op == constants.DDM_ADD and
11186           len(instance.disks) >= constants.MAX_DISKS):
11187         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11188                                    " add more" % constants.MAX_DISKS,
11189                                    errors.ECODE_STATE)
11190       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11191         # an existing disk
11192         if disk_op < 0 or disk_op >= len(instance.disks):
11193           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11194                                      " are 0 to %d" %
11195                                      (disk_op, len(instance.disks)),
11196                                      errors.ECODE_INVAL)
11197
11198     return
11199
11200   def _ConvertPlainToDrbd(self, feedback_fn):
11201     """Converts an instance from plain to drbd.
11202
11203     """
11204     feedback_fn("Converting template to drbd")
11205     instance = self.instance
11206     pnode = instance.primary_node
11207     snode = self.op.remote_node
11208
11209     # create a fake disk info for _GenerateDiskTemplate
11210     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11211                   constants.IDISK_VG: d.logical_id[0]}
11212                  for d in instance.disks]
11213     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11214                                       instance.name, pnode, [snode],
11215                                       disk_info, None, None, 0, feedback_fn)
11216     info = _GetInstanceInfoText(instance)
11217     feedback_fn("Creating aditional volumes...")
11218     # first, create the missing data and meta devices
11219     for disk in new_disks:
11220       # unfortunately this is... not too nice
11221       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11222                             info, True)
11223       for child in disk.children:
11224         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11225     # at this stage, all new LVs have been created, we can rename the
11226     # old ones
11227     feedback_fn("Renaming original volumes...")
11228     rename_list = [(o, n.children[0].logical_id)
11229                    for (o, n) in zip(instance.disks, new_disks)]
11230     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11231     result.Raise("Failed to rename original LVs")
11232
11233     feedback_fn("Initializing DRBD devices...")
11234     # all child devices are in place, we can now create the DRBD devices
11235     for disk in new_disks:
11236       for node in [pnode, snode]:
11237         f_create = node == pnode
11238         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11239
11240     # at this point, the instance has been modified
11241     instance.disk_template = constants.DT_DRBD8
11242     instance.disks = new_disks
11243     self.cfg.Update(instance, feedback_fn)
11244
11245     # disks are created, waiting for sync
11246     disk_abort = not _WaitForSync(self, instance,
11247                                   oneshot=not self.op.wait_for_sync)
11248     if disk_abort:
11249       raise errors.OpExecError("There are some degraded disks for"
11250                                " this instance, please cleanup manually")
11251
11252   def _ConvertDrbdToPlain(self, feedback_fn):
11253     """Converts an instance from drbd to plain.
11254
11255     """
11256     instance = self.instance
11257     assert len(instance.secondary_nodes) == 1
11258     pnode = instance.primary_node
11259     snode = instance.secondary_nodes[0]
11260     feedback_fn("Converting template to plain")
11261
11262     old_disks = instance.disks
11263     new_disks = [d.children[0] for d in old_disks]
11264
11265     # copy over size and mode
11266     for parent, child in zip(old_disks, new_disks):
11267       child.size = parent.size
11268       child.mode = parent.mode
11269
11270     # update instance structure
11271     instance.disks = new_disks
11272     instance.disk_template = constants.DT_PLAIN
11273     self.cfg.Update(instance, feedback_fn)
11274
11275     feedback_fn("Removing volumes on the secondary node...")
11276     for disk in old_disks:
11277       self.cfg.SetDiskID(disk, snode)
11278       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11279       if msg:
11280         self.LogWarning("Could not remove block device %s on node %s,"
11281                         " continuing anyway: %s", disk.iv_name, snode, msg)
11282
11283     feedback_fn("Removing unneeded volumes on the primary node...")
11284     for idx, disk in enumerate(old_disks):
11285       meta = disk.children[1]
11286       self.cfg.SetDiskID(meta, pnode)
11287       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11288       if msg:
11289         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11290                         " continuing anyway: %s", idx, pnode, msg)
11291
11292   def Exec(self, feedback_fn):
11293     """Modifies an instance.
11294
11295     All parameters take effect only at the next restart of the instance.
11296
11297     """
11298     # Process here the warnings from CheckPrereq, as we don't have a
11299     # feedback_fn there.
11300     for warn in self.warn:
11301       feedback_fn("WARNING: %s" % warn)
11302
11303     result = []
11304     instance = self.instance
11305     # disk changes
11306     for disk_op, disk_dict in self.op.disks:
11307       if disk_op == constants.DDM_REMOVE:
11308         # remove the last disk
11309         device = instance.disks.pop()
11310         device_idx = len(instance.disks)
11311         for node, disk in device.ComputeNodeTree(instance.primary_node):
11312           self.cfg.SetDiskID(disk, node)
11313           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11314           if msg:
11315             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11316                             " continuing anyway", device_idx, node, msg)
11317         result.append(("disk/%d" % device_idx, "remove"))
11318       elif disk_op == constants.DDM_ADD:
11319         # add a new disk
11320         if instance.disk_template in (constants.DT_FILE,
11321                                         constants.DT_SHARED_FILE):
11322           file_driver, file_path = instance.disks[0].logical_id
11323           file_path = os.path.dirname(file_path)
11324         else:
11325           file_driver = file_path = None
11326         disk_idx_base = len(instance.disks)
11327         new_disk = _GenerateDiskTemplate(self,
11328                                          instance.disk_template,
11329                                          instance.name, instance.primary_node,
11330                                          instance.secondary_nodes,
11331                                          [disk_dict],
11332                                          file_path,
11333                                          file_driver,
11334                                          disk_idx_base, feedback_fn)[0]
11335         instance.disks.append(new_disk)
11336         info = _GetInstanceInfoText(instance)
11337
11338         logging.info("Creating volume %s for instance %s",
11339                      new_disk.iv_name, instance.name)
11340         # Note: this needs to be kept in sync with _CreateDisks
11341         #HARDCODE
11342         for node in instance.all_nodes:
11343           f_create = node == instance.primary_node
11344           try:
11345             _CreateBlockDev(self, node, instance, new_disk,
11346                             f_create, info, f_create)
11347           except errors.OpExecError, err:
11348             self.LogWarning("Failed to create volume %s (%s) on"
11349                             " node %s: %s",
11350                             new_disk.iv_name, new_disk, node, err)
11351         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11352                        (new_disk.size, new_disk.mode)))
11353       else:
11354         # change a given disk
11355         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11356         result.append(("disk.mode/%d" % disk_op,
11357                        disk_dict[constants.IDISK_MODE]))
11358
11359     if self.op.disk_template:
11360       r_shut = _ShutdownInstanceDisks(self, instance)
11361       if not r_shut:
11362         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11363                                  " proceed with disk template conversion")
11364       mode = (instance.disk_template, self.op.disk_template)
11365       try:
11366         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11367       except:
11368         self.cfg.ReleaseDRBDMinors(instance.name)
11369         raise
11370       result.append(("disk_template", self.op.disk_template))
11371
11372     # NIC changes
11373     for nic_op, nic_dict in self.op.nics:
11374       if nic_op == constants.DDM_REMOVE:
11375         # remove the last nic
11376         del instance.nics[-1]
11377         result.append(("nic.%d" % len(instance.nics), "remove"))
11378       elif nic_op == constants.DDM_ADD:
11379         # mac and bridge should be set, by now
11380         mac = nic_dict[constants.INIC_MAC]
11381         ip = nic_dict.get(constants.INIC_IP, None)
11382         nicparams = self.nic_pinst[constants.DDM_ADD]
11383         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11384         instance.nics.append(new_nic)
11385         result.append(("nic.%d" % (len(instance.nics) - 1),
11386                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11387                        (new_nic.mac, new_nic.ip,
11388                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11389                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11390                        )))
11391       else:
11392         for key in (constants.INIC_MAC, constants.INIC_IP):
11393           if key in nic_dict:
11394             setattr(instance.nics[nic_op], key, nic_dict[key])
11395         if nic_op in self.nic_pinst:
11396           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11397         for key, val in nic_dict.iteritems():
11398           result.append(("nic.%s/%d" % (key, nic_op), val))
11399
11400     # hvparams changes
11401     if self.op.hvparams:
11402       instance.hvparams = self.hv_inst
11403       for key, val in self.op.hvparams.iteritems():
11404         result.append(("hv/%s" % key, val))
11405
11406     # beparams changes
11407     if self.op.beparams:
11408       instance.beparams = self.be_inst
11409       for key, val in self.op.beparams.iteritems():
11410         result.append(("be/%s" % key, val))
11411
11412     # OS change
11413     if self.op.os_name:
11414       instance.os = self.op.os_name
11415
11416     # osparams changes
11417     if self.op.osparams:
11418       instance.osparams = self.os_inst
11419       for key, val in self.op.osparams.iteritems():
11420         result.append(("os/%s" % key, val))
11421
11422     self.cfg.Update(instance, feedback_fn)
11423
11424     return result
11425
11426   _DISK_CONVERSIONS = {
11427     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11428     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11429     }
11430
11431
11432 class LUInstanceChangeGroup(LogicalUnit):
11433   HPATH = "instance-change-group"
11434   HTYPE = constants.HTYPE_INSTANCE
11435   REQ_BGL = False
11436
11437   def ExpandNames(self):
11438     self.share_locks = _ShareAll()
11439     self.needed_locks = {
11440       locking.LEVEL_NODEGROUP: [],
11441       locking.LEVEL_NODE: [],
11442       }
11443
11444     self._ExpandAndLockInstance()
11445
11446     if self.op.target_groups:
11447       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11448                                   self.op.target_groups)
11449     else:
11450       self.req_target_uuids = None
11451
11452     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11453
11454   def DeclareLocks(self, level):
11455     if level == locking.LEVEL_NODEGROUP:
11456       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11457
11458       if self.req_target_uuids:
11459         lock_groups = set(self.req_target_uuids)
11460
11461         # Lock all groups used by instance optimistically; this requires going
11462         # via the node before it's locked, requiring verification later on
11463         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11464         lock_groups.update(instance_groups)
11465       else:
11466         # No target groups, need to lock all of them
11467         lock_groups = locking.ALL_SET
11468
11469       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11470
11471     elif level == locking.LEVEL_NODE:
11472       if self.req_target_uuids:
11473         # Lock all nodes used by instances
11474         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11475         self._LockInstancesNodes()
11476
11477         # Lock all nodes in all potential target groups
11478         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11479                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11480         member_nodes = [node_name
11481                         for group in lock_groups
11482                         for node_name in self.cfg.GetNodeGroup(group).members]
11483         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11484       else:
11485         # Lock all nodes as all groups are potential targets
11486         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11487
11488   def CheckPrereq(self):
11489     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11490     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11491     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11492
11493     assert (self.req_target_uuids is None or
11494             owned_groups.issuperset(self.req_target_uuids))
11495     assert owned_instances == set([self.op.instance_name])
11496
11497     # Get instance information
11498     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11499
11500     # Check if node groups for locked instance are still correct
11501     assert owned_nodes.issuperset(self.instance.all_nodes), \
11502       ("Instance %s's nodes changed while we kept the lock" %
11503        self.op.instance_name)
11504
11505     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11506                                            owned_groups)
11507
11508     if self.req_target_uuids:
11509       # User requested specific target groups
11510       self.target_uuids = self.req_target_uuids
11511     else:
11512       # All groups except those used by the instance are potential targets
11513       self.target_uuids = owned_groups - inst_groups
11514
11515     conflicting_groups = self.target_uuids & inst_groups
11516     if conflicting_groups:
11517       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11518                                  " used by the instance '%s'" %
11519                                  (utils.CommaJoin(conflicting_groups),
11520                                   self.op.instance_name),
11521                                  errors.ECODE_INVAL)
11522
11523     if not self.target_uuids:
11524       raise errors.OpPrereqError("There are no possible target groups",
11525                                  errors.ECODE_INVAL)
11526
11527   def BuildHooksEnv(self):
11528     """Build hooks env.
11529
11530     """
11531     assert self.target_uuids
11532
11533     env = {
11534       "TARGET_GROUPS": " ".join(self.target_uuids),
11535       }
11536
11537     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11538
11539     return env
11540
11541   def BuildHooksNodes(self):
11542     """Build hooks nodes.
11543
11544     """
11545     mn = self.cfg.GetMasterNode()
11546     return ([mn], [mn])
11547
11548   def Exec(self, feedback_fn):
11549     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11550
11551     assert instances == [self.op.instance_name], "Instance not locked"
11552
11553     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11554                      instances=instances, target_groups=list(self.target_uuids))
11555
11556     ial.Run(self.op.iallocator)
11557
11558     if not ial.success:
11559       raise errors.OpPrereqError("Can't compute solution for changing group of"
11560                                  " instance '%s' using iallocator '%s': %s" %
11561                                  (self.op.instance_name, self.op.iallocator,
11562                                   ial.info),
11563                                  errors.ECODE_NORES)
11564
11565     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11566
11567     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11568                  " instance '%s'", len(jobs), self.op.instance_name)
11569
11570     return ResultWithJobs(jobs)
11571
11572
11573 class LUBackupQuery(NoHooksLU):
11574   """Query the exports list
11575
11576   """
11577   REQ_BGL = False
11578
11579   def ExpandNames(self):
11580     self.needed_locks = {}
11581     self.share_locks[locking.LEVEL_NODE] = 1
11582     if not self.op.nodes:
11583       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11584     else:
11585       self.needed_locks[locking.LEVEL_NODE] = \
11586         _GetWantedNodes(self, self.op.nodes)
11587
11588   def Exec(self, feedback_fn):
11589     """Compute the list of all the exported system images.
11590
11591     @rtype: dict
11592     @return: a dictionary with the structure node->(export-list)
11593         where export-list is a list of the instances exported on
11594         that node.
11595
11596     """
11597     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11598     rpcresult = self.rpc.call_export_list(self.nodes)
11599     result = {}
11600     for node in rpcresult:
11601       if rpcresult[node].fail_msg:
11602         result[node] = False
11603       else:
11604         result[node] = rpcresult[node].payload
11605
11606     return result
11607
11608
11609 class LUBackupPrepare(NoHooksLU):
11610   """Prepares an instance for an export and returns useful information.
11611
11612   """
11613   REQ_BGL = False
11614
11615   def ExpandNames(self):
11616     self._ExpandAndLockInstance()
11617
11618   def CheckPrereq(self):
11619     """Check prerequisites.
11620
11621     """
11622     instance_name = self.op.instance_name
11623
11624     self.instance = self.cfg.GetInstanceInfo(instance_name)
11625     assert self.instance is not None, \
11626           "Cannot retrieve locked instance %s" % self.op.instance_name
11627     _CheckNodeOnline(self, self.instance.primary_node)
11628
11629     self._cds = _GetClusterDomainSecret()
11630
11631   def Exec(self, feedback_fn):
11632     """Prepares an instance for an export.
11633
11634     """
11635     instance = self.instance
11636
11637     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11638       salt = utils.GenerateSecret(8)
11639
11640       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11641       result = self.rpc.call_x509_cert_create(instance.primary_node,
11642                                               constants.RIE_CERT_VALIDITY)
11643       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11644
11645       (name, cert_pem) = result.payload
11646
11647       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11648                                              cert_pem)
11649
11650       return {
11651         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11652         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11653                           salt),
11654         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11655         }
11656
11657     return None
11658
11659
11660 class LUBackupExport(LogicalUnit):
11661   """Export an instance to an image in the cluster.
11662
11663   """
11664   HPATH = "instance-export"
11665   HTYPE = constants.HTYPE_INSTANCE
11666   REQ_BGL = False
11667
11668   def CheckArguments(self):
11669     """Check the arguments.
11670
11671     """
11672     self.x509_key_name = self.op.x509_key_name
11673     self.dest_x509_ca_pem = self.op.destination_x509_ca
11674
11675     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11676       if not self.x509_key_name:
11677         raise errors.OpPrereqError("Missing X509 key name for encryption",
11678                                    errors.ECODE_INVAL)
11679
11680       if not self.dest_x509_ca_pem:
11681         raise errors.OpPrereqError("Missing destination X509 CA",
11682                                    errors.ECODE_INVAL)
11683
11684   def ExpandNames(self):
11685     self._ExpandAndLockInstance()
11686
11687     # Lock all nodes for local exports
11688     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11689       # FIXME: lock only instance primary and destination node
11690       #
11691       # Sad but true, for now we have do lock all nodes, as we don't know where
11692       # the previous export might be, and in this LU we search for it and
11693       # remove it from its current node. In the future we could fix this by:
11694       #  - making a tasklet to search (share-lock all), then create the
11695       #    new one, then one to remove, after
11696       #  - removing the removal operation altogether
11697       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11698
11699   def DeclareLocks(self, level):
11700     """Last minute lock declaration."""
11701     # All nodes are locked anyway, so nothing to do here.
11702
11703   def BuildHooksEnv(self):
11704     """Build hooks env.
11705
11706     This will run on the master, primary node and target node.
11707
11708     """
11709     env = {
11710       "EXPORT_MODE": self.op.mode,
11711       "EXPORT_NODE": self.op.target_node,
11712       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11713       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11714       # TODO: Generic function for boolean env variables
11715       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11716       }
11717
11718     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11719
11720     return env
11721
11722   def BuildHooksNodes(self):
11723     """Build hooks nodes.
11724
11725     """
11726     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11727
11728     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11729       nl.append(self.op.target_node)
11730
11731     return (nl, nl)
11732
11733   def CheckPrereq(self):
11734     """Check prerequisites.
11735
11736     This checks that the instance and node names are valid.
11737
11738     """
11739     instance_name = self.op.instance_name
11740
11741     self.instance = self.cfg.GetInstanceInfo(instance_name)
11742     assert self.instance is not None, \
11743           "Cannot retrieve locked instance %s" % self.op.instance_name
11744     _CheckNodeOnline(self, self.instance.primary_node)
11745
11746     if (self.op.remove_instance and self.instance.admin_up and
11747         not self.op.shutdown):
11748       raise errors.OpPrereqError("Can not remove instance without shutting it"
11749                                  " down before")
11750
11751     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11752       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11753       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11754       assert self.dst_node is not None
11755
11756       _CheckNodeOnline(self, self.dst_node.name)
11757       _CheckNodeNotDrained(self, self.dst_node.name)
11758
11759       self._cds = None
11760       self.dest_disk_info = None
11761       self.dest_x509_ca = None
11762
11763     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11764       self.dst_node = None
11765
11766       if len(self.op.target_node) != len(self.instance.disks):
11767         raise errors.OpPrereqError(("Received destination information for %s"
11768                                     " disks, but instance %s has %s disks") %
11769                                    (len(self.op.target_node), instance_name,
11770                                     len(self.instance.disks)),
11771                                    errors.ECODE_INVAL)
11772
11773       cds = _GetClusterDomainSecret()
11774
11775       # Check X509 key name
11776       try:
11777         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11778       except (TypeError, ValueError), err:
11779         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11780
11781       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11782         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11783                                    errors.ECODE_INVAL)
11784
11785       # Load and verify CA
11786       try:
11787         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11788       except OpenSSL.crypto.Error, err:
11789         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11790                                    (err, ), errors.ECODE_INVAL)
11791
11792       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11793       if errcode is not None:
11794         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11795                                    (msg, ), errors.ECODE_INVAL)
11796
11797       self.dest_x509_ca = cert
11798
11799       # Verify target information
11800       disk_info = []
11801       for idx, disk_data in enumerate(self.op.target_node):
11802         try:
11803           (host, port, magic) = \
11804             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11805         except errors.GenericError, err:
11806           raise errors.OpPrereqError("Target info for disk %s: %s" %
11807                                      (idx, err), errors.ECODE_INVAL)
11808
11809         disk_info.append((host, port, magic))
11810
11811       assert len(disk_info) == len(self.op.target_node)
11812       self.dest_disk_info = disk_info
11813
11814     else:
11815       raise errors.ProgrammerError("Unhandled export mode %r" %
11816                                    self.op.mode)
11817
11818     # instance disk type verification
11819     # TODO: Implement export support for file-based disks
11820     for disk in self.instance.disks:
11821       if disk.dev_type == constants.LD_FILE:
11822         raise errors.OpPrereqError("Export not supported for instances with"
11823                                    " file-based disks", errors.ECODE_INVAL)
11824
11825   def _CleanupExports(self, feedback_fn):
11826     """Removes exports of current instance from all other nodes.
11827
11828     If an instance in a cluster with nodes A..D was exported to node C, its
11829     exports will be removed from the nodes A, B and D.
11830
11831     """
11832     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11833
11834     nodelist = self.cfg.GetNodeList()
11835     nodelist.remove(self.dst_node.name)
11836
11837     # on one-node clusters nodelist will be empty after the removal
11838     # if we proceed the backup would be removed because OpBackupQuery
11839     # substitutes an empty list with the full cluster node list.
11840     iname = self.instance.name
11841     if nodelist:
11842       feedback_fn("Removing old exports for instance %s" % iname)
11843       exportlist = self.rpc.call_export_list(nodelist)
11844       for node in exportlist:
11845         if exportlist[node].fail_msg:
11846           continue
11847         if iname in exportlist[node].payload:
11848           msg = self.rpc.call_export_remove(node, iname).fail_msg
11849           if msg:
11850             self.LogWarning("Could not remove older export for instance %s"
11851                             " on node %s: %s", iname, node, msg)
11852
11853   def Exec(self, feedback_fn):
11854     """Export an instance to an image in the cluster.
11855
11856     """
11857     assert self.op.mode in constants.EXPORT_MODES
11858
11859     instance = self.instance
11860     src_node = instance.primary_node
11861
11862     if self.op.shutdown:
11863       # shutdown the instance, but not the disks
11864       feedback_fn("Shutting down instance %s" % instance.name)
11865       result = self.rpc.call_instance_shutdown(src_node, instance,
11866                                                self.op.shutdown_timeout)
11867       # TODO: Maybe ignore failures if ignore_remove_failures is set
11868       result.Raise("Could not shutdown instance %s on"
11869                    " node %s" % (instance.name, src_node))
11870
11871     # set the disks ID correctly since call_instance_start needs the
11872     # correct drbd minor to create the symlinks
11873     for disk in instance.disks:
11874       self.cfg.SetDiskID(disk, src_node)
11875
11876     activate_disks = (not instance.admin_up)
11877
11878     if activate_disks:
11879       # Activate the instance disks if we'exporting a stopped instance
11880       feedback_fn("Activating disks for %s" % instance.name)
11881       _StartInstanceDisks(self, instance, None)
11882
11883     try:
11884       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11885                                                      instance)
11886
11887       helper.CreateSnapshots()
11888       try:
11889         if (self.op.shutdown and instance.admin_up and
11890             not self.op.remove_instance):
11891           assert not activate_disks
11892           feedback_fn("Starting instance %s" % instance.name)
11893           result = self.rpc.call_instance_start(src_node, instance,
11894                                                 None, None, False)
11895           msg = result.fail_msg
11896           if msg:
11897             feedback_fn("Failed to start instance: %s" % msg)
11898             _ShutdownInstanceDisks(self, instance)
11899             raise errors.OpExecError("Could not start instance: %s" % msg)
11900
11901         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11902           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11903         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11904           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11905           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11906
11907           (key_name, _, _) = self.x509_key_name
11908
11909           dest_ca_pem = \
11910             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11911                                             self.dest_x509_ca)
11912
11913           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11914                                                      key_name, dest_ca_pem,
11915                                                      timeouts)
11916       finally:
11917         helper.Cleanup()
11918
11919       # Check for backwards compatibility
11920       assert len(dresults) == len(instance.disks)
11921       assert compat.all(isinstance(i, bool) for i in dresults), \
11922              "Not all results are boolean: %r" % dresults
11923
11924     finally:
11925       if activate_disks:
11926         feedback_fn("Deactivating disks for %s" % instance.name)
11927         _ShutdownInstanceDisks(self, instance)
11928
11929     if not (compat.all(dresults) and fin_resu):
11930       failures = []
11931       if not fin_resu:
11932         failures.append("export finalization")
11933       if not compat.all(dresults):
11934         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11935                                if not dsk)
11936         failures.append("disk export: disk(s) %s" % fdsk)
11937
11938       raise errors.OpExecError("Export failed, errors in %s" %
11939                                utils.CommaJoin(failures))
11940
11941     # At this point, the export was successful, we can cleanup/finish
11942
11943     # Remove instance if requested
11944     if self.op.remove_instance:
11945       feedback_fn("Removing instance %s" % instance.name)
11946       _RemoveInstance(self, feedback_fn, instance,
11947                       self.op.ignore_remove_failures)
11948
11949     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11950       self._CleanupExports(feedback_fn)
11951
11952     return fin_resu, dresults
11953
11954
11955 class LUBackupRemove(NoHooksLU):
11956   """Remove exports related to the named instance.
11957
11958   """
11959   REQ_BGL = False
11960
11961   def ExpandNames(self):
11962     self.needed_locks = {}
11963     # We need all nodes to be locked in order for RemoveExport to work, but we
11964     # don't need to lock the instance itself, as nothing will happen to it (and
11965     # we can remove exports also for a removed instance)
11966     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11967
11968   def Exec(self, feedback_fn):
11969     """Remove any export.
11970
11971     """
11972     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11973     # If the instance was not found we'll try with the name that was passed in.
11974     # This will only work if it was an FQDN, though.
11975     fqdn_warn = False
11976     if not instance_name:
11977       fqdn_warn = True
11978       instance_name = self.op.instance_name
11979
11980     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11981     exportlist = self.rpc.call_export_list(locked_nodes)
11982     found = False
11983     for node in exportlist:
11984       msg = exportlist[node].fail_msg
11985       if msg:
11986         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11987         continue
11988       if instance_name in exportlist[node].payload:
11989         found = True
11990         result = self.rpc.call_export_remove(node, instance_name)
11991         msg = result.fail_msg
11992         if msg:
11993           logging.error("Could not remove export for instance %s"
11994                         " on node %s: %s", instance_name, node, msg)
11995
11996     if fqdn_warn and not found:
11997       feedback_fn("Export not found. If trying to remove an export belonging"
11998                   " to a deleted instance please use its Fully Qualified"
11999                   " Domain Name.")
12000
12001
12002 class LUGroupAdd(LogicalUnit):
12003   """Logical unit for creating node groups.
12004
12005   """
12006   HPATH = "group-add"
12007   HTYPE = constants.HTYPE_GROUP
12008   REQ_BGL = False
12009
12010   def ExpandNames(self):
12011     # We need the new group's UUID here so that we can create and acquire the
12012     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12013     # that it should not check whether the UUID exists in the configuration.
12014     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12015     self.needed_locks = {}
12016     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12017
12018   def CheckPrereq(self):
12019     """Check prerequisites.
12020
12021     This checks that the given group name is not an existing node group
12022     already.
12023
12024     """
12025     try:
12026       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12027     except errors.OpPrereqError:
12028       pass
12029     else:
12030       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12031                                  " node group (UUID: %s)" %
12032                                  (self.op.group_name, existing_uuid),
12033                                  errors.ECODE_EXISTS)
12034
12035     if self.op.ndparams:
12036       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12037
12038   def BuildHooksEnv(self):
12039     """Build hooks env.
12040
12041     """
12042     return {
12043       "GROUP_NAME": self.op.group_name,
12044       }
12045
12046   def BuildHooksNodes(self):
12047     """Build hooks nodes.
12048
12049     """
12050     mn = self.cfg.GetMasterNode()
12051     return ([mn], [mn])
12052
12053   def Exec(self, feedback_fn):
12054     """Add the node group to the cluster.
12055
12056     """
12057     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12058                                   uuid=self.group_uuid,
12059                                   alloc_policy=self.op.alloc_policy,
12060                                   ndparams=self.op.ndparams)
12061
12062     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12063     del self.remove_locks[locking.LEVEL_NODEGROUP]
12064
12065
12066 class LUGroupAssignNodes(NoHooksLU):
12067   """Logical unit for assigning nodes to groups.
12068
12069   """
12070   REQ_BGL = False
12071
12072   def ExpandNames(self):
12073     # These raise errors.OpPrereqError on their own:
12074     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12075     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12076
12077     # We want to lock all the affected nodes and groups. We have readily
12078     # available the list of nodes, and the *destination* group. To gather the
12079     # list of "source" groups, we need to fetch node information later on.
12080     self.needed_locks = {
12081       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12082       locking.LEVEL_NODE: self.op.nodes,
12083       }
12084
12085   def DeclareLocks(self, level):
12086     if level == locking.LEVEL_NODEGROUP:
12087       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12088
12089       # Try to get all affected nodes' groups without having the group or node
12090       # lock yet. Needs verification later in the code flow.
12091       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12092
12093       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12094
12095   def CheckPrereq(self):
12096     """Check prerequisites.
12097
12098     """
12099     assert self.needed_locks[locking.LEVEL_NODEGROUP]
12100     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12101             frozenset(self.op.nodes))
12102
12103     expected_locks = (set([self.group_uuid]) |
12104                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12105     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12106     if actual_locks != expected_locks:
12107       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12108                                " current groups are '%s', used to be '%s'" %
12109                                (utils.CommaJoin(expected_locks),
12110                                 utils.CommaJoin(actual_locks)))
12111
12112     self.node_data = self.cfg.GetAllNodesInfo()
12113     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12114     instance_data = self.cfg.GetAllInstancesInfo()
12115
12116     if self.group is None:
12117       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12118                                (self.op.group_name, self.group_uuid))
12119
12120     (new_splits, previous_splits) = \
12121       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12122                                              for node in self.op.nodes],
12123                                             self.node_data, instance_data)
12124
12125     if new_splits:
12126       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12127
12128       if not self.op.force:
12129         raise errors.OpExecError("The following instances get split by this"
12130                                  " change and --force was not given: %s" %
12131                                  fmt_new_splits)
12132       else:
12133         self.LogWarning("This operation will split the following instances: %s",
12134                         fmt_new_splits)
12135
12136         if previous_splits:
12137           self.LogWarning("In addition, these already-split instances continue"
12138                           " to be split across groups: %s",
12139                           utils.CommaJoin(utils.NiceSort(previous_splits)))
12140
12141   def Exec(self, feedback_fn):
12142     """Assign nodes to a new group.
12143
12144     """
12145     for node in self.op.nodes:
12146       self.node_data[node].group = self.group_uuid
12147
12148     # FIXME: Depends on side-effects of modifying the result of
12149     # C{cfg.GetAllNodesInfo}
12150
12151     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12152
12153   @staticmethod
12154   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12155     """Check for split instances after a node assignment.
12156
12157     This method considers a series of node assignments as an atomic operation,
12158     and returns information about split instances after applying the set of
12159     changes.
12160
12161     In particular, it returns information about newly split instances, and
12162     instances that were already split, and remain so after the change.
12163
12164     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12165     considered.
12166
12167     @type changes: list of (node_name, new_group_uuid) pairs.
12168     @param changes: list of node assignments to consider.
12169     @param node_data: a dict with data for all nodes
12170     @param instance_data: a dict with all instances to consider
12171     @rtype: a two-tuple
12172     @return: a list of instances that were previously okay and result split as a
12173       consequence of this change, and a list of instances that were previously
12174       split and this change does not fix.
12175
12176     """
12177     changed_nodes = dict((node, group) for node, group in changes
12178                          if node_data[node].group != group)
12179
12180     all_split_instances = set()
12181     previously_split_instances = set()
12182
12183     def InstanceNodes(instance):
12184       return [instance.primary_node] + list(instance.secondary_nodes)
12185
12186     for inst in instance_data.values():
12187       if inst.disk_template not in constants.DTS_INT_MIRROR:
12188         continue
12189
12190       instance_nodes = InstanceNodes(inst)
12191
12192       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12193         previously_split_instances.add(inst.name)
12194
12195       if len(set(changed_nodes.get(node, node_data[node].group)
12196                  for node in instance_nodes)) > 1:
12197         all_split_instances.add(inst.name)
12198
12199     return (list(all_split_instances - previously_split_instances),
12200             list(previously_split_instances & all_split_instances))
12201
12202
12203 class _GroupQuery(_QueryBase):
12204   FIELDS = query.GROUP_FIELDS
12205
12206   def ExpandNames(self, lu):
12207     lu.needed_locks = {}
12208
12209     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12210     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12211
12212     if not self.names:
12213       self.wanted = [name_to_uuid[name]
12214                      for name in utils.NiceSort(name_to_uuid.keys())]
12215     else:
12216       # Accept names to be either names or UUIDs.
12217       missing = []
12218       self.wanted = []
12219       all_uuid = frozenset(self._all_groups.keys())
12220
12221       for name in self.names:
12222         if name in all_uuid:
12223           self.wanted.append(name)
12224         elif name in name_to_uuid:
12225           self.wanted.append(name_to_uuid[name])
12226         else:
12227           missing.append(name)
12228
12229       if missing:
12230         raise errors.OpPrereqError("Some groups do not exist: %s" %
12231                                    utils.CommaJoin(missing),
12232                                    errors.ECODE_NOENT)
12233
12234   def DeclareLocks(self, lu, level):
12235     pass
12236
12237   def _GetQueryData(self, lu):
12238     """Computes the list of node groups and their attributes.
12239
12240     """
12241     do_nodes = query.GQ_NODE in self.requested_data
12242     do_instances = query.GQ_INST in self.requested_data
12243
12244     group_to_nodes = None
12245     group_to_instances = None
12246
12247     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12248     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12249     # latter GetAllInstancesInfo() is not enough, for we have to go through
12250     # instance->node. Hence, we will need to process nodes even if we only need
12251     # instance information.
12252     if do_nodes or do_instances:
12253       all_nodes = lu.cfg.GetAllNodesInfo()
12254       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12255       node_to_group = {}
12256
12257       for node in all_nodes.values():
12258         if node.group in group_to_nodes:
12259           group_to_nodes[node.group].append(node.name)
12260           node_to_group[node.name] = node.group
12261
12262       if do_instances:
12263         all_instances = lu.cfg.GetAllInstancesInfo()
12264         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12265
12266         for instance in all_instances.values():
12267           node = instance.primary_node
12268           if node in node_to_group:
12269             group_to_instances[node_to_group[node]].append(instance.name)
12270
12271         if not do_nodes:
12272           # Do not pass on node information if it was not requested.
12273           group_to_nodes = None
12274
12275     return query.GroupQueryData([self._all_groups[uuid]
12276                                  for uuid in self.wanted],
12277                                 group_to_nodes, group_to_instances)
12278
12279
12280 class LUGroupQuery(NoHooksLU):
12281   """Logical unit for querying node groups.
12282
12283   """
12284   REQ_BGL = False
12285
12286   def CheckArguments(self):
12287     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12288                           self.op.output_fields, False)
12289
12290   def ExpandNames(self):
12291     self.gq.ExpandNames(self)
12292
12293   def DeclareLocks(self, level):
12294     self.gq.DeclareLocks(self, level)
12295
12296   def Exec(self, feedback_fn):
12297     return self.gq.OldStyleQuery(self)
12298
12299
12300 class LUGroupSetParams(LogicalUnit):
12301   """Modifies the parameters of a node group.
12302
12303   """
12304   HPATH = "group-modify"
12305   HTYPE = constants.HTYPE_GROUP
12306   REQ_BGL = False
12307
12308   def CheckArguments(self):
12309     all_changes = [
12310       self.op.ndparams,
12311       self.op.alloc_policy,
12312       ]
12313
12314     if all_changes.count(None) == len(all_changes):
12315       raise errors.OpPrereqError("Please pass at least one modification",
12316                                  errors.ECODE_INVAL)
12317
12318   def ExpandNames(self):
12319     # This raises errors.OpPrereqError on its own:
12320     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12321
12322     self.needed_locks = {
12323       locking.LEVEL_NODEGROUP: [self.group_uuid],
12324       }
12325
12326   def CheckPrereq(self):
12327     """Check prerequisites.
12328
12329     """
12330     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12331
12332     if self.group is None:
12333       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12334                                (self.op.group_name, self.group_uuid))
12335
12336     if self.op.ndparams:
12337       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12338       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12339       self.new_ndparams = new_ndparams
12340
12341   def BuildHooksEnv(self):
12342     """Build hooks env.
12343
12344     """
12345     return {
12346       "GROUP_NAME": self.op.group_name,
12347       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12348       }
12349
12350   def BuildHooksNodes(self):
12351     """Build hooks nodes.
12352
12353     """
12354     mn = self.cfg.GetMasterNode()
12355     return ([mn], [mn])
12356
12357   def Exec(self, feedback_fn):
12358     """Modifies the node group.
12359
12360     """
12361     result = []
12362
12363     if self.op.ndparams:
12364       self.group.ndparams = self.new_ndparams
12365       result.append(("ndparams", str(self.group.ndparams)))
12366
12367     if self.op.alloc_policy:
12368       self.group.alloc_policy = self.op.alloc_policy
12369
12370     self.cfg.Update(self.group, feedback_fn)
12371     return result
12372
12373
12374 class LUGroupRemove(LogicalUnit):
12375   HPATH = "group-remove"
12376   HTYPE = constants.HTYPE_GROUP
12377   REQ_BGL = False
12378
12379   def ExpandNames(self):
12380     # This will raises errors.OpPrereqError on its own:
12381     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12382     self.needed_locks = {
12383       locking.LEVEL_NODEGROUP: [self.group_uuid],
12384       }
12385
12386   def CheckPrereq(self):
12387     """Check prerequisites.
12388
12389     This checks that the given group name exists as a node group, that is
12390     empty (i.e., contains no nodes), and that is not the last group of the
12391     cluster.
12392
12393     """
12394     # Verify that the group is empty.
12395     group_nodes = [node.name
12396                    for node in self.cfg.GetAllNodesInfo().values()
12397                    if node.group == self.group_uuid]
12398
12399     if group_nodes:
12400       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12401                                  " nodes: %s" %
12402                                  (self.op.group_name,
12403                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12404                                  errors.ECODE_STATE)
12405
12406     # Verify the cluster would not be left group-less.
12407     if len(self.cfg.GetNodeGroupList()) == 1:
12408       raise errors.OpPrereqError("Group '%s' is the only group,"
12409                                  " cannot be removed" %
12410                                  self.op.group_name,
12411                                  errors.ECODE_STATE)
12412
12413   def BuildHooksEnv(self):
12414     """Build hooks env.
12415
12416     """
12417     return {
12418       "GROUP_NAME": self.op.group_name,
12419       }
12420
12421   def BuildHooksNodes(self):
12422     """Build hooks nodes.
12423
12424     """
12425     mn = self.cfg.GetMasterNode()
12426     return ([mn], [mn])
12427
12428   def Exec(self, feedback_fn):
12429     """Remove the node group.
12430
12431     """
12432     try:
12433       self.cfg.RemoveNodeGroup(self.group_uuid)
12434     except errors.ConfigurationError:
12435       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12436                                (self.op.group_name, self.group_uuid))
12437
12438     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12439
12440
12441 class LUGroupRename(LogicalUnit):
12442   HPATH = "group-rename"
12443   HTYPE = constants.HTYPE_GROUP
12444   REQ_BGL = False
12445
12446   def ExpandNames(self):
12447     # This raises errors.OpPrereqError on its own:
12448     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12449
12450     self.needed_locks = {
12451       locking.LEVEL_NODEGROUP: [self.group_uuid],
12452       }
12453
12454   def CheckPrereq(self):
12455     """Check prerequisites.
12456
12457     Ensures requested new name is not yet used.
12458
12459     """
12460     try:
12461       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12462     except errors.OpPrereqError:
12463       pass
12464     else:
12465       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12466                                  " node group (UUID: %s)" %
12467                                  (self.op.new_name, new_name_uuid),
12468                                  errors.ECODE_EXISTS)
12469
12470   def BuildHooksEnv(self):
12471     """Build hooks env.
12472
12473     """
12474     return {
12475       "OLD_NAME": self.op.group_name,
12476       "NEW_NAME": self.op.new_name,
12477       }
12478
12479   def BuildHooksNodes(self):
12480     """Build hooks nodes.
12481
12482     """
12483     mn = self.cfg.GetMasterNode()
12484
12485     all_nodes = self.cfg.GetAllNodesInfo()
12486     all_nodes.pop(mn, None)
12487
12488     run_nodes = [mn]
12489     run_nodes.extend(node.name for node in all_nodes.values()
12490                      if node.group == self.group_uuid)
12491
12492     return (run_nodes, run_nodes)
12493
12494   def Exec(self, feedback_fn):
12495     """Rename the node group.
12496
12497     """
12498     group = self.cfg.GetNodeGroup(self.group_uuid)
12499
12500     if group is None:
12501       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12502                                (self.op.group_name, self.group_uuid))
12503
12504     group.name = self.op.new_name
12505     self.cfg.Update(group, feedback_fn)
12506
12507     return self.op.new_name
12508
12509
12510 class LUGroupEvacuate(LogicalUnit):
12511   HPATH = "group-evacuate"
12512   HTYPE = constants.HTYPE_GROUP
12513   REQ_BGL = False
12514
12515   def ExpandNames(self):
12516     # This raises errors.OpPrereqError on its own:
12517     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12518
12519     if self.op.target_groups:
12520       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12521                                   self.op.target_groups)
12522     else:
12523       self.req_target_uuids = []
12524
12525     if self.group_uuid in self.req_target_uuids:
12526       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12527                                  " as a target group (targets are %s)" %
12528                                  (self.group_uuid,
12529                                   utils.CommaJoin(self.req_target_uuids)),
12530                                  errors.ECODE_INVAL)
12531
12532     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12533
12534     self.share_locks = _ShareAll()
12535     self.needed_locks = {
12536       locking.LEVEL_INSTANCE: [],
12537       locking.LEVEL_NODEGROUP: [],
12538       locking.LEVEL_NODE: [],
12539       }
12540
12541   def DeclareLocks(self, level):
12542     if level == locking.LEVEL_INSTANCE:
12543       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12544
12545       # Lock instances optimistically, needs verification once node and group
12546       # locks have been acquired
12547       self.needed_locks[locking.LEVEL_INSTANCE] = \
12548         self.cfg.GetNodeGroupInstances(self.group_uuid)
12549
12550     elif level == locking.LEVEL_NODEGROUP:
12551       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12552
12553       if self.req_target_uuids:
12554         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12555
12556         # Lock all groups used by instances optimistically; this requires going
12557         # via the node before it's locked, requiring verification later on
12558         lock_groups.update(group_uuid
12559                            for instance_name in
12560                              self.owned_locks(locking.LEVEL_INSTANCE)
12561                            for group_uuid in
12562                              self.cfg.GetInstanceNodeGroups(instance_name))
12563       else:
12564         # No target groups, need to lock all of them
12565         lock_groups = locking.ALL_SET
12566
12567       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12568
12569     elif level == locking.LEVEL_NODE:
12570       # This will only lock the nodes in the group to be evacuated which
12571       # contain actual instances
12572       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12573       self._LockInstancesNodes()
12574
12575       # Lock all nodes in group to be evacuated and target groups
12576       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12577       assert self.group_uuid in owned_groups
12578       member_nodes = [node_name
12579                       for group in owned_groups
12580                       for node_name in self.cfg.GetNodeGroup(group).members]
12581       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12582
12583   def CheckPrereq(self):
12584     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12585     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12586     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12587
12588     assert owned_groups.issuperset(self.req_target_uuids)
12589     assert self.group_uuid in owned_groups
12590
12591     # Check if locked instances are still correct
12592     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12593
12594     # Get instance information
12595     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12596
12597     # Check if node groups for locked instances are still correct
12598     for instance_name in owned_instances:
12599       inst = self.instances[instance_name]
12600       assert owned_nodes.issuperset(inst.all_nodes), \
12601         "Instance %s's nodes changed while we kept the lock" % instance_name
12602
12603       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12604                                              owned_groups)
12605
12606       assert self.group_uuid in inst_groups, \
12607         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12608
12609     if self.req_target_uuids:
12610       # User requested specific target groups
12611       self.target_uuids = self.req_target_uuids
12612     else:
12613       # All groups except the one to be evacuated are potential targets
12614       self.target_uuids = [group_uuid for group_uuid in owned_groups
12615                            if group_uuid != self.group_uuid]
12616
12617       if not self.target_uuids:
12618         raise errors.OpPrereqError("There are no possible target groups",
12619                                    errors.ECODE_INVAL)
12620
12621   def BuildHooksEnv(self):
12622     """Build hooks env.
12623
12624     """
12625     return {
12626       "GROUP_NAME": self.op.group_name,
12627       "TARGET_GROUPS": " ".join(self.target_uuids),
12628       }
12629
12630   def BuildHooksNodes(self):
12631     """Build hooks nodes.
12632
12633     """
12634     mn = self.cfg.GetMasterNode()
12635
12636     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12637
12638     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12639
12640     return (run_nodes, run_nodes)
12641
12642   def Exec(self, feedback_fn):
12643     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12644
12645     assert self.group_uuid not in self.target_uuids
12646
12647     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12648                      instances=instances, target_groups=self.target_uuids)
12649
12650     ial.Run(self.op.iallocator)
12651
12652     if not ial.success:
12653       raise errors.OpPrereqError("Can't compute group evacuation using"
12654                                  " iallocator '%s': %s" %
12655                                  (self.op.iallocator, ial.info),
12656                                  errors.ECODE_NORES)
12657
12658     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12659
12660     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12661                  len(jobs), self.op.group_name)
12662
12663     return ResultWithJobs(jobs)
12664
12665
12666 class TagsLU(NoHooksLU): # pylint: disable=W0223
12667   """Generic tags LU.
12668
12669   This is an abstract class which is the parent of all the other tags LUs.
12670
12671   """
12672   def ExpandNames(self):
12673     self.group_uuid = None
12674     self.needed_locks = {}
12675     if self.op.kind == constants.TAG_NODE:
12676       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12677       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12678     elif self.op.kind == constants.TAG_INSTANCE:
12679       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12680       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12681     elif self.op.kind == constants.TAG_NODEGROUP:
12682       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12683
12684     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12685     # not possible to acquire the BGL based on opcode parameters)
12686
12687   def CheckPrereq(self):
12688     """Check prerequisites.
12689
12690     """
12691     if self.op.kind == constants.TAG_CLUSTER:
12692       self.target = self.cfg.GetClusterInfo()
12693     elif self.op.kind == constants.TAG_NODE:
12694       self.target = self.cfg.GetNodeInfo(self.op.name)
12695     elif self.op.kind == constants.TAG_INSTANCE:
12696       self.target = self.cfg.GetInstanceInfo(self.op.name)
12697     elif self.op.kind == constants.TAG_NODEGROUP:
12698       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12699     else:
12700       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12701                                  str(self.op.kind), errors.ECODE_INVAL)
12702
12703
12704 class LUTagsGet(TagsLU):
12705   """Returns the tags of a given object.
12706
12707   """
12708   REQ_BGL = False
12709
12710   def ExpandNames(self):
12711     TagsLU.ExpandNames(self)
12712
12713     # Share locks as this is only a read operation
12714     self.share_locks = _ShareAll()
12715
12716   def Exec(self, feedback_fn):
12717     """Returns the tag list.
12718
12719     """
12720     return list(self.target.GetTags())
12721
12722
12723 class LUTagsSearch(NoHooksLU):
12724   """Searches the tags for a given pattern.
12725
12726   """
12727   REQ_BGL = False
12728
12729   def ExpandNames(self):
12730     self.needed_locks = {}
12731
12732   def CheckPrereq(self):
12733     """Check prerequisites.
12734
12735     This checks the pattern passed for validity by compiling it.
12736
12737     """
12738     try:
12739       self.re = re.compile(self.op.pattern)
12740     except re.error, err:
12741       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12742                                  (self.op.pattern, err), errors.ECODE_INVAL)
12743
12744   def Exec(self, feedback_fn):
12745     """Returns the tag list.
12746
12747     """
12748     cfg = self.cfg
12749     tgts = [("/cluster", cfg.GetClusterInfo())]
12750     ilist = cfg.GetAllInstancesInfo().values()
12751     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12752     nlist = cfg.GetAllNodesInfo().values()
12753     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12754     tgts.extend(("/nodegroup/%s" % n.name, n)
12755                 for n in cfg.GetAllNodeGroupsInfo().values())
12756     results = []
12757     for path, target in tgts:
12758       for tag in target.GetTags():
12759         if self.re.search(tag):
12760           results.append((path, tag))
12761     return results
12762
12763
12764 class LUTagsSet(TagsLU):
12765   """Sets a tag on a given object.
12766
12767   """
12768   REQ_BGL = False
12769
12770   def CheckPrereq(self):
12771     """Check prerequisites.
12772
12773     This checks the type and length of the tag name and value.
12774
12775     """
12776     TagsLU.CheckPrereq(self)
12777     for tag in self.op.tags:
12778       objects.TaggableObject.ValidateTag(tag)
12779
12780   def Exec(self, feedback_fn):
12781     """Sets the tag.
12782
12783     """
12784     try:
12785       for tag in self.op.tags:
12786         self.target.AddTag(tag)
12787     except errors.TagError, err:
12788       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12789     self.cfg.Update(self.target, feedback_fn)
12790
12791
12792 class LUTagsDel(TagsLU):
12793   """Delete a list of tags from a given object.
12794
12795   """
12796   REQ_BGL = False
12797
12798   def CheckPrereq(self):
12799     """Check prerequisites.
12800
12801     This checks that we have the given tag.
12802
12803     """
12804     TagsLU.CheckPrereq(self)
12805     for tag in self.op.tags:
12806       objects.TaggableObject.ValidateTag(tag)
12807     del_tags = frozenset(self.op.tags)
12808     cur_tags = self.target.GetTags()
12809
12810     diff_tags = del_tags - cur_tags
12811     if diff_tags:
12812       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12813       raise errors.OpPrereqError("Tag(s) %s not found" %
12814                                  (utils.CommaJoin(diff_names), ),
12815                                  errors.ECODE_NOENT)
12816
12817   def Exec(self, feedback_fn):
12818     """Remove the tag from the object.
12819
12820     """
12821     for tag in self.op.tags:
12822       self.target.RemoveTag(tag)
12823     self.cfg.Update(self.target, feedback_fn)
12824
12825
12826 class LUTestDelay(NoHooksLU):
12827   """Sleep for a specified amount of time.
12828
12829   This LU sleeps on the master and/or nodes for a specified amount of
12830   time.
12831
12832   """
12833   REQ_BGL = False
12834
12835   def ExpandNames(self):
12836     """Expand names and set required locks.
12837
12838     This expands the node list, if any.
12839
12840     """
12841     self.needed_locks = {}
12842     if self.op.on_nodes:
12843       # _GetWantedNodes can be used here, but is not always appropriate to use
12844       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12845       # more information.
12846       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12847       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12848
12849   def _TestDelay(self):
12850     """Do the actual sleep.
12851
12852     """
12853     if self.op.on_master:
12854       if not utils.TestDelay(self.op.duration):
12855         raise errors.OpExecError("Error during master delay test")
12856     if self.op.on_nodes:
12857       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12858       for node, node_result in result.items():
12859         node_result.Raise("Failure during rpc call to node %s" % node)
12860
12861   def Exec(self, feedback_fn):
12862     """Execute the test delay opcode, with the wanted repetitions.
12863
12864     """
12865     if self.op.repeat == 0:
12866       self._TestDelay()
12867     else:
12868       top_value = self.op.repeat - 1
12869       for i in range(self.op.repeat):
12870         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12871         self._TestDelay()
12872
12873
12874 class LUTestJqueue(NoHooksLU):
12875   """Utility LU to test some aspects of the job queue.
12876
12877   """
12878   REQ_BGL = False
12879
12880   # Must be lower than default timeout for WaitForJobChange to see whether it
12881   # notices changed jobs
12882   _CLIENT_CONNECT_TIMEOUT = 20.0
12883   _CLIENT_CONFIRM_TIMEOUT = 60.0
12884
12885   @classmethod
12886   def _NotifyUsingSocket(cls, cb, errcls):
12887     """Opens a Unix socket and waits for another program to connect.
12888
12889     @type cb: callable
12890     @param cb: Callback to send socket name to client
12891     @type errcls: class
12892     @param errcls: Exception class to use for errors
12893
12894     """
12895     # Using a temporary directory as there's no easy way to create temporary
12896     # sockets without writing a custom loop around tempfile.mktemp and
12897     # socket.bind
12898     tmpdir = tempfile.mkdtemp()
12899     try:
12900       tmpsock = utils.PathJoin(tmpdir, "sock")
12901
12902       logging.debug("Creating temporary socket at %s", tmpsock)
12903       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12904       try:
12905         sock.bind(tmpsock)
12906         sock.listen(1)
12907
12908         # Send details to client
12909         cb(tmpsock)
12910
12911         # Wait for client to connect before continuing
12912         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12913         try:
12914           (conn, _) = sock.accept()
12915         except socket.error, err:
12916           raise errcls("Client didn't connect in time (%s)" % err)
12917       finally:
12918         sock.close()
12919     finally:
12920       # Remove as soon as client is connected
12921       shutil.rmtree(tmpdir)
12922
12923     # Wait for client to close
12924     try:
12925       try:
12926         # pylint: disable=E1101
12927         # Instance of '_socketobject' has no ... member
12928         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12929         conn.recv(1)
12930       except socket.error, err:
12931         raise errcls("Client failed to confirm notification (%s)" % err)
12932     finally:
12933       conn.close()
12934
12935   def _SendNotification(self, test, arg, sockname):
12936     """Sends a notification to the client.
12937
12938     @type test: string
12939     @param test: Test name
12940     @param arg: Test argument (depends on test)
12941     @type sockname: string
12942     @param sockname: Socket path
12943
12944     """
12945     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12946
12947   def _Notify(self, prereq, test, arg):
12948     """Notifies the client of a test.
12949
12950     @type prereq: bool
12951     @param prereq: Whether this is a prereq-phase test
12952     @type test: string
12953     @param test: Test name
12954     @param arg: Test argument (depends on test)
12955
12956     """
12957     if prereq:
12958       errcls = errors.OpPrereqError
12959     else:
12960       errcls = errors.OpExecError
12961
12962     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12963                                                   test, arg),
12964                                    errcls)
12965
12966   def CheckArguments(self):
12967     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12968     self.expandnames_calls = 0
12969
12970   def ExpandNames(self):
12971     checkargs_calls = getattr(self, "checkargs_calls", 0)
12972     if checkargs_calls < 1:
12973       raise errors.ProgrammerError("CheckArguments was not called")
12974
12975     self.expandnames_calls += 1
12976
12977     if self.op.notify_waitlock:
12978       self._Notify(True, constants.JQT_EXPANDNAMES, None)
12979
12980     self.LogInfo("Expanding names")
12981
12982     # Get lock on master node (just to get a lock, not for a particular reason)
12983     self.needed_locks = {
12984       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12985       }
12986
12987   def Exec(self, feedback_fn):
12988     if self.expandnames_calls < 1:
12989       raise errors.ProgrammerError("ExpandNames was not called")
12990
12991     if self.op.notify_exec:
12992       self._Notify(False, constants.JQT_EXEC, None)
12993
12994     self.LogInfo("Executing")
12995
12996     if self.op.log_messages:
12997       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12998       for idx, msg in enumerate(self.op.log_messages):
12999         self.LogInfo("Sending log message %s", idx + 1)
13000         feedback_fn(constants.JQT_MSGPREFIX + msg)
13001         # Report how many test messages have been sent
13002         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13003
13004     if self.op.fail:
13005       raise errors.OpExecError("Opcode failure was requested")
13006
13007     return True
13008
13009
13010 class IAllocator(object):
13011   """IAllocator framework.
13012
13013   An IAllocator instance has three sets of attributes:
13014     - cfg that is needed to query the cluster
13015     - input data (all members of the _KEYS class attribute are required)
13016     - four buffer attributes (in|out_data|text), that represent the
13017       input (to the external script) in text and data structure format,
13018       and the output from it, again in two formats
13019     - the result variables from the script (success, info, nodes) for
13020       easy usage
13021
13022   """
13023   # pylint: disable=R0902
13024   # lots of instance attributes
13025
13026   def __init__(self, cfg, rpc, mode, **kwargs):
13027     self.cfg = cfg
13028     self.rpc = rpc
13029     # init buffer variables
13030     self.in_text = self.out_text = self.in_data = self.out_data = None
13031     # init all input fields so that pylint is happy
13032     self.mode = mode
13033     self.memory = self.disks = self.disk_template = None
13034     self.os = self.tags = self.nics = self.vcpus = None
13035     self.hypervisor = None
13036     self.relocate_from = None
13037     self.name = None
13038     self.instances = None
13039     self.evac_mode = None
13040     self.target_groups = []
13041     # computed fields
13042     self.required_nodes = None
13043     # init result fields
13044     self.success = self.info = self.result = None
13045
13046     try:
13047       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13048     except KeyError:
13049       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13050                                    " IAllocator" % self.mode)
13051
13052     keyset = [n for (n, _) in keydata]
13053
13054     for key in kwargs:
13055       if key not in keyset:
13056         raise errors.ProgrammerError("Invalid input parameter '%s' to"
13057                                      " IAllocator" % key)
13058       setattr(self, key, kwargs[key])
13059
13060     for key in keyset:
13061       if key not in kwargs:
13062         raise errors.ProgrammerError("Missing input parameter '%s' to"
13063                                      " IAllocator" % key)
13064     self._BuildInputData(compat.partial(fn, self), keydata)
13065
13066   def _ComputeClusterData(self):
13067     """Compute the generic allocator input data.
13068
13069     This is the data that is independent of the actual operation.
13070
13071     """
13072     cfg = self.cfg
13073     cluster_info = cfg.GetClusterInfo()
13074     # cluster data
13075     data = {
13076       "version": constants.IALLOCATOR_VERSION,
13077       "cluster_name": cfg.GetClusterName(),
13078       "cluster_tags": list(cluster_info.GetTags()),
13079       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13080       # we don't have job IDs
13081       }
13082     ninfo = cfg.GetAllNodesInfo()
13083     iinfo = cfg.GetAllInstancesInfo().values()
13084     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13085
13086     # node data
13087     node_list = [n.name for n in ninfo.values() if n.vm_capable]
13088
13089     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13090       hypervisor_name = self.hypervisor
13091     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13092       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13093     else:
13094       hypervisor_name = cluster_info.enabled_hypervisors[0]
13095
13096     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13097                                         hypervisor_name)
13098     node_iinfo = \
13099       self.rpc.call_all_instances_info(node_list,
13100                                        cluster_info.enabled_hypervisors)
13101
13102     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13103
13104     config_ndata = self._ComputeBasicNodeData(ninfo)
13105     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13106                                                  i_list, config_ndata)
13107     assert len(data["nodes"]) == len(ninfo), \
13108         "Incomplete node data computed"
13109
13110     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13111
13112     self.in_data = data
13113
13114   @staticmethod
13115   def _ComputeNodeGroupData(cfg):
13116     """Compute node groups data.
13117
13118     """
13119     ng = dict((guuid, {
13120       "name": gdata.name,
13121       "alloc_policy": gdata.alloc_policy,
13122       })
13123       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13124
13125     return ng
13126
13127   @staticmethod
13128   def _ComputeBasicNodeData(node_cfg):
13129     """Compute global node data.
13130
13131     @rtype: dict
13132     @returns: a dict of name: (node dict, node config)
13133
13134     """
13135     # fill in static (config-based) values
13136     node_results = dict((ninfo.name, {
13137       "tags": list(ninfo.GetTags()),
13138       "primary_ip": ninfo.primary_ip,
13139       "secondary_ip": ninfo.secondary_ip,
13140       "offline": ninfo.offline,
13141       "drained": ninfo.drained,
13142       "master_candidate": ninfo.master_candidate,
13143       "group": ninfo.group,
13144       "master_capable": ninfo.master_capable,
13145       "vm_capable": ninfo.vm_capable,
13146       })
13147       for ninfo in node_cfg.values())
13148
13149     return node_results
13150
13151   @staticmethod
13152   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13153                               node_results):
13154     """Compute global node data.
13155
13156     @param node_results: the basic node structures as filled from the config
13157
13158     """
13159     # make a copy of the current dict
13160     node_results = dict(node_results)
13161     for nname, nresult in node_data.items():
13162       assert nname in node_results, "Missing basic data for node %s" % nname
13163       ninfo = node_cfg[nname]
13164
13165       if not (ninfo.offline or ninfo.drained):
13166         nresult.Raise("Can't get data for node %s" % nname)
13167         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13168                                 nname)
13169         remote_info = nresult.payload
13170
13171         for attr in ["memory_total", "memory_free", "memory_dom0",
13172                      "vg_size", "vg_free", "cpu_total"]:
13173           if attr not in remote_info:
13174             raise errors.OpExecError("Node '%s' didn't return attribute"
13175                                      " '%s'" % (nname, attr))
13176           if not isinstance(remote_info[attr], int):
13177             raise errors.OpExecError("Node '%s' returned invalid value"
13178                                      " for '%s': %s" %
13179                                      (nname, attr, remote_info[attr]))
13180         # compute memory used by primary instances
13181         i_p_mem = i_p_up_mem = 0
13182         for iinfo, beinfo in i_list:
13183           if iinfo.primary_node == nname:
13184             i_p_mem += beinfo[constants.BE_MEMORY]
13185             if iinfo.name not in node_iinfo[nname].payload:
13186               i_used_mem = 0
13187             else:
13188               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13189             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13190             remote_info["memory_free"] -= max(0, i_mem_diff)
13191
13192             if iinfo.admin_up:
13193               i_p_up_mem += beinfo[constants.BE_MEMORY]
13194
13195         # compute memory used by instances
13196         pnr_dyn = {
13197           "total_memory": remote_info["memory_total"],
13198           "reserved_memory": remote_info["memory_dom0"],
13199           "free_memory": remote_info["memory_free"],
13200           "total_disk": remote_info["vg_size"],
13201           "free_disk": remote_info["vg_free"],
13202           "total_cpus": remote_info["cpu_total"],
13203           "i_pri_memory": i_p_mem,
13204           "i_pri_up_memory": i_p_up_mem,
13205           }
13206         pnr_dyn.update(node_results[nname])
13207         node_results[nname] = pnr_dyn
13208
13209     return node_results
13210
13211   @staticmethod
13212   def _ComputeInstanceData(cluster_info, i_list):
13213     """Compute global instance data.
13214
13215     """
13216     instance_data = {}
13217     for iinfo, beinfo in i_list:
13218       nic_data = []
13219       for nic in iinfo.nics:
13220         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13221         nic_dict = {
13222           "mac": nic.mac,
13223           "ip": nic.ip,
13224           "mode": filled_params[constants.NIC_MODE],
13225           "link": filled_params[constants.NIC_LINK],
13226           }
13227         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13228           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13229         nic_data.append(nic_dict)
13230       pir = {
13231         "tags": list(iinfo.GetTags()),
13232         "admin_up": iinfo.admin_up,
13233         "vcpus": beinfo[constants.BE_VCPUS],
13234         "memory": beinfo[constants.BE_MEMORY],
13235         "os": iinfo.os,
13236         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13237         "nics": nic_data,
13238         "disks": [{constants.IDISK_SIZE: dsk.size,
13239                    constants.IDISK_MODE: dsk.mode}
13240                   for dsk in iinfo.disks],
13241         "disk_template": iinfo.disk_template,
13242         "hypervisor": iinfo.hypervisor,
13243         }
13244       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13245                                                  pir["disks"])
13246       instance_data[iinfo.name] = pir
13247
13248     return instance_data
13249
13250   def _AddNewInstance(self):
13251     """Add new instance data to allocator structure.
13252
13253     This in combination with _AllocatorGetClusterData will create the
13254     correct structure needed as input for the allocator.
13255
13256     The checks for the completeness of the opcode must have already been
13257     done.
13258
13259     """
13260     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13261
13262     if self.disk_template in constants.DTS_INT_MIRROR:
13263       self.required_nodes = 2
13264     else:
13265       self.required_nodes = 1
13266
13267     request = {
13268       "name": self.name,
13269       "disk_template": self.disk_template,
13270       "tags": self.tags,
13271       "os": self.os,
13272       "vcpus": self.vcpus,
13273       "memory": self.memory,
13274       "disks": self.disks,
13275       "disk_space_total": disk_space,
13276       "nics": self.nics,
13277       "required_nodes": self.required_nodes,
13278       "hypervisor": self.hypervisor,
13279       }
13280
13281     return request
13282
13283   def _AddRelocateInstance(self):
13284     """Add relocate instance data to allocator structure.
13285
13286     This in combination with _IAllocatorGetClusterData will create the
13287     correct structure needed as input for the allocator.
13288
13289     The checks for the completeness of the opcode must have already been
13290     done.
13291
13292     """
13293     instance = self.cfg.GetInstanceInfo(self.name)
13294     if instance is None:
13295       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13296                                    " IAllocator" % self.name)
13297
13298     if instance.disk_template not in constants.DTS_MIRRORED:
13299       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13300                                  errors.ECODE_INVAL)
13301
13302     if instance.disk_template in constants.DTS_INT_MIRROR and \
13303         len(instance.secondary_nodes) != 1:
13304       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13305                                  errors.ECODE_STATE)
13306
13307     self.required_nodes = 1
13308     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13309     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13310
13311     request = {
13312       "name": self.name,
13313       "disk_space_total": disk_space,
13314       "required_nodes": self.required_nodes,
13315       "relocate_from": self.relocate_from,
13316       }
13317     return request
13318
13319   def _AddNodeEvacuate(self):
13320     """Get data for node-evacuate requests.
13321
13322     """
13323     return {
13324       "instances": self.instances,
13325       "evac_mode": self.evac_mode,
13326       }
13327
13328   def _AddChangeGroup(self):
13329     """Get data for node-evacuate requests.
13330
13331     """
13332     return {
13333       "instances": self.instances,
13334       "target_groups": self.target_groups,
13335       }
13336
13337   def _BuildInputData(self, fn, keydata):
13338     """Build input data structures.
13339
13340     """
13341     self._ComputeClusterData()
13342
13343     request = fn()
13344     request["type"] = self.mode
13345     for keyname, keytype in keydata:
13346       if keyname not in request:
13347         raise errors.ProgrammerError("Request parameter %s is missing" %
13348                                      keyname)
13349       val = request[keyname]
13350       if not keytype(val):
13351         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13352                                      " validation, value %s, expected"
13353                                      " type %s" % (keyname, val, keytype))
13354     self.in_data["request"] = request
13355
13356     self.in_text = serializer.Dump(self.in_data)
13357
13358   _STRING_LIST = ht.TListOf(ht.TString)
13359   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13360      # pylint: disable=E1101
13361      # Class '...' has no 'OP_ID' member
13362      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13363                           opcodes.OpInstanceMigrate.OP_ID,
13364                           opcodes.OpInstanceReplaceDisks.OP_ID])
13365      })))
13366
13367   _NEVAC_MOVED = \
13368     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13369                        ht.TItems([ht.TNonEmptyString,
13370                                   ht.TNonEmptyString,
13371                                   ht.TListOf(ht.TNonEmptyString),
13372                                  ])))
13373   _NEVAC_FAILED = \
13374     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13375                        ht.TItems([ht.TNonEmptyString,
13376                                   ht.TMaybeString,
13377                                  ])))
13378   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13379                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13380
13381   _MODE_DATA = {
13382     constants.IALLOCATOR_MODE_ALLOC:
13383       (_AddNewInstance,
13384        [
13385         ("name", ht.TString),
13386         ("memory", ht.TInt),
13387         ("disks", ht.TListOf(ht.TDict)),
13388         ("disk_template", ht.TString),
13389         ("os", ht.TString),
13390         ("tags", _STRING_LIST),
13391         ("nics", ht.TListOf(ht.TDict)),
13392         ("vcpus", ht.TInt),
13393         ("hypervisor", ht.TString),
13394         ], ht.TList),
13395     constants.IALLOCATOR_MODE_RELOC:
13396       (_AddRelocateInstance,
13397        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13398        ht.TList),
13399      constants.IALLOCATOR_MODE_NODE_EVAC:
13400       (_AddNodeEvacuate, [
13401         ("instances", _STRING_LIST),
13402         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13403         ], _NEVAC_RESULT),
13404      constants.IALLOCATOR_MODE_CHG_GROUP:
13405       (_AddChangeGroup, [
13406         ("instances", _STRING_LIST),
13407         ("target_groups", _STRING_LIST),
13408         ], _NEVAC_RESULT),
13409     }
13410
13411   def Run(self, name, validate=True, call_fn=None):
13412     """Run an instance allocator and return the results.
13413
13414     """
13415     if call_fn is None:
13416       call_fn = self.rpc.call_iallocator_runner
13417
13418     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13419     result.Raise("Failure while running the iallocator script")
13420
13421     self.out_text = result.payload
13422     if validate:
13423       self._ValidateResult()
13424
13425   def _ValidateResult(self):
13426     """Process the allocator results.
13427
13428     This will process and if successful save the result in
13429     self.out_data and the other parameters.
13430
13431     """
13432     try:
13433       rdict = serializer.Load(self.out_text)
13434     except Exception, err:
13435       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13436
13437     if not isinstance(rdict, dict):
13438       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13439
13440     # TODO: remove backwards compatiblity in later versions
13441     if "nodes" in rdict and "result" not in rdict:
13442       rdict["result"] = rdict["nodes"]
13443       del rdict["nodes"]
13444
13445     for key in "success", "info", "result":
13446       if key not in rdict:
13447         raise errors.OpExecError("Can't parse iallocator results:"
13448                                  " missing key '%s'" % key)
13449       setattr(self, key, rdict[key])
13450
13451     if not self._result_check(self.result):
13452       raise errors.OpExecError("Iallocator returned invalid result,"
13453                                " expected %s, got %s" %
13454                                (self._result_check, self.result),
13455                                errors.ECODE_INVAL)
13456
13457     if self.mode == constants.IALLOCATOR_MODE_RELOC:
13458       assert self.relocate_from is not None
13459       assert self.required_nodes == 1
13460
13461       node2group = dict((name, ndata["group"])
13462                         for (name, ndata) in self.in_data["nodes"].items())
13463
13464       fn = compat.partial(self._NodesToGroups, node2group,
13465                           self.in_data["nodegroups"])
13466
13467       instance = self.cfg.GetInstanceInfo(self.name)
13468       request_groups = fn(self.relocate_from + [instance.primary_node])
13469       result_groups = fn(rdict["result"] + [instance.primary_node])
13470
13471       if self.success and not set(result_groups).issubset(request_groups):
13472         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13473                                  " differ from original groups (%s)" %
13474                                  (utils.CommaJoin(result_groups),
13475                                   utils.CommaJoin(request_groups)))
13476
13477     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13478       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13479
13480     self.out_data = rdict
13481
13482   @staticmethod
13483   def _NodesToGroups(node2group, groups, nodes):
13484     """Returns a list of unique group names for a list of nodes.
13485
13486     @type node2group: dict
13487     @param node2group: Map from node name to group UUID
13488     @type groups: dict
13489     @param groups: Group information
13490     @type nodes: list
13491     @param nodes: Node names
13492
13493     """
13494     result = set()
13495
13496     for node in nodes:
13497       try:
13498         group_uuid = node2group[node]
13499       except KeyError:
13500         # Ignore unknown node
13501         pass
13502       else:
13503         try:
13504           group = groups[group_uuid]
13505         except KeyError:
13506           # Can't find group, let's use UUID
13507           group_name = group_uuid
13508         else:
13509           group_name = group["name"]
13510
13511         result.add(group_name)
13512
13513     return sorted(result)
13514
13515
13516 class LUTestAllocator(NoHooksLU):
13517   """Run allocator tests.
13518
13519   This LU runs the allocator tests
13520
13521   """
13522   def CheckPrereq(self):
13523     """Check prerequisites.
13524
13525     This checks the opcode parameters depending on the director and mode test.
13526
13527     """
13528     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13529       for attr in ["memory", "disks", "disk_template",
13530                    "os", "tags", "nics", "vcpus"]:
13531         if not hasattr(self.op, attr):
13532           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13533                                      attr, errors.ECODE_INVAL)
13534       iname = self.cfg.ExpandInstanceName(self.op.name)
13535       if iname is not None:
13536         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13537                                    iname, errors.ECODE_EXISTS)
13538       if not isinstance(self.op.nics, list):
13539         raise errors.OpPrereqError("Invalid parameter 'nics'",
13540                                    errors.ECODE_INVAL)
13541       if not isinstance(self.op.disks, list):
13542         raise errors.OpPrereqError("Invalid parameter 'disks'",
13543                                    errors.ECODE_INVAL)
13544       for row in self.op.disks:
13545         if (not isinstance(row, dict) or
13546             constants.IDISK_SIZE not in row or
13547             not isinstance(row[constants.IDISK_SIZE], int) or
13548             constants.IDISK_MODE not in row or
13549             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13550           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13551                                      " parameter", errors.ECODE_INVAL)
13552       if self.op.hypervisor is None:
13553         self.op.hypervisor = self.cfg.GetHypervisorType()
13554     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13555       fname = _ExpandInstanceName(self.cfg, self.op.name)
13556       self.op.name = fname
13557       self.relocate_from = \
13558           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13559     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13560                           constants.IALLOCATOR_MODE_NODE_EVAC):
13561       if not self.op.instances:
13562         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13563       self.op.instances = _GetWantedInstances(self, self.op.instances)
13564     else:
13565       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13566                                  self.op.mode, errors.ECODE_INVAL)
13567
13568     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13569       if self.op.allocator is None:
13570         raise errors.OpPrereqError("Missing allocator name",
13571                                    errors.ECODE_INVAL)
13572     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13573       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13574                                  self.op.direction, errors.ECODE_INVAL)
13575
13576   def Exec(self, feedback_fn):
13577     """Run the allocator test.
13578
13579     """
13580     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13581       ial = IAllocator(self.cfg, self.rpc,
13582                        mode=self.op.mode,
13583                        name=self.op.name,
13584                        memory=self.op.memory,
13585                        disks=self.op.disks,
13586                        disk_template=self.op.disk_template,
13587                        os=self.op.os,
13588                        tags=self.op.tags,
13589                        nics=self.op.nics,
13590                        vcpus=self.op.vcpus,
13591                        hypervisor=self.op.hypervisor,
13592                        )
13593     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13594       ial = IAllocator(self.cfg, self.rpc,
13595                        mode=self.op.mode,
13596                        name=self.op.name,
13597                        relocate_from=list(self.relocate_from),
13598                        )
13599     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13600       ial = IAllocator(self.cfg, self.rpc,
13601                        mode=self.op.mode,
13602                        instances=self.op.instances,
13603                        target_groups=self.op.target_groups)
13604     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13605       ial = IAllocator(self.cfg, self.rpc,
13606                        mode=self.op.mode,
13607                        instances=self.op.instances,
13608                        evac_mode=self.op.evac_mode)
13609     else:
13610       raise errors.ProgrammerError("Uncatched mode %s in"
13611                                    " LUTestAllocator.Exec", self.op.mode)
13612
13613     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13614       result = ial.in_text
13615     else:
13616       ial.Run(self.op.allocator, validate=False)
13617       result = ial.out_text
13618     return result
13619
13620
13621 #: Query type implementations
13622 _QUERY_IMPL = {
13623   constants.QR_INSTANCE: _InstanceQuery,
13624   constants.QR_NODE: _NodeQuery,
13625   constants.QR_GROUP: _GroupQuery,
13626   constants.QR_OS: _OsQuery,
13627   }
13628
13629 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13630
13631
13632 def _GetQueryImplementation(name):
13633   """Returns the implemtnation for a query type.
13634
13635   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13636
13637   """
13638   try:
13639     return _QUERY_IMPL[name]
13640   except KeyError:
13641     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13642                                errors.ECODE_INVAL)